d/2024-10-03.html


<!DOCTYPE html>
<html>
<head>
    <script async src="https://www.googletagmanager.com/gtag/js?id=G-C1CRWDNJ1J"></script>
    <script>
        window.dataLayer = window.dataLayer || [];
        function gtag(){dataLayer.push(arguments);}
        gtag('js', new Date());
        gtag('config', 'G-C1CRWDNJ1J');
    </script>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0"><title>HF. 24 papers. October 3.</title>
<link rel="icon" href="favicon.svg" sizes="any" type="image/svg+xml">
    <link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap" rel="stylesheet">
    <link href="https://fonts.googleapis.com/css2?family=Roboto+Slab:wght@100..900&family=Tiny5&display=swap" rel="stylesheet">
    <style>
        :root {
            --primary-color: cornflowerblue;
            --primary-color-dark: #fffd87cf;
            --secondary-color: #fff;
            --background-color: #eee;
            --text-color: #333333;
            --header-color: cornflowerblue;
            --body-color: #eee;
            --menu-color: #002370;
        }
        .background-digit {
            position: absolute;
            font-family: 'Tiny5';
            bottom: -20px;
            right: -10px;
            font-size: 8em;
            font-weight: 400;
            color: #0989ea22;
            z-index: 2;
            line-height: 1;
        }
        .dark-theme .background-digit {
            color: #e9e78f3d;
        }
        body {
            font-family: 'Roboto Slab', sans-serif;
            line-height: 1.6;
            color: var(--text-color);
            margin: 0;
            padding: 0;
            min-height: 100vh;
            display: flex;
            flex-direction: column;
        }
        .container {
            max-width: 1500px;
            margin: 0 auto;
            padding: 0 20px;
            flex: 1 0 auto;
        }
        .a-clean {
            color: var(--secondary-color);
            text-decoration: none;
        }
        .a-clean:hover {
            color: #fff;
        }
        header {
            padding: 3.6em 0 2.4em 0;
            text-align: center;
        }
        footer {
            background-color: var(--primary-color);
            color: white;
            text-align: center;
            margin-top: 2em;
            flex-shrink: 0;
            padding: 20px;
        }
        h1 {
            font-size: 2.4em;
            margin: 0;
            font-weight: 700;
        }
        .article-title-cont {
            margin: -21px -21px 0px -21px;
            padding: 10px 20px;
            background: cornflowerblue;
            display: table;
            min-height: 5.9em;
        }
        .dark-theme .article-title-cont {
            background: #444444;
        }
        .article-title {
            color: white;           
        }
        .article-title h2 {
            margin: 0px;
            padding: 0px;
            font-weight: 400;
            text-align:center;
        }
        h2 {
            # color: var(--primary-color);
            font-size: 1.2em;
            margin-top: 0;
            margin-bottom: 0.5em;
        }
        header p {
            font-size: 1.2em;
            margin-top: 0.5em;
            font-weight: 300;
        }
        main {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
            gap: 1.5em;
            padding: 10px 0 20px 0;
        }
        body.dark-tmeme>header {
            background-color: background-color: #333333;
            color: white;
        }
        body.dark-theme>div>main>article>div.article-content>p.meta {
            color: #fff;
        }
        body.light-theme>div>main>article>div.article-content>p.meta {
            color: #555;
        }
        body.dark-theme>div>main>article>div.article-content>p.pub-date {
            color: #ccc;
        }
        body.light-theme>div>main>article>div.article-content>p.pub-date {
            color: #555;
        }
        body.dark-theme>div>main>article>div.article-content>div.tags {
            color: #ccc;
        }
        body.light-theme>div>main>article>div.article-content>div.tags {
            color: #fff;
        }
        body.light-theme>header {
            background-color: var(--header-color);
            color: white;
        }
        article {
            border-radius: 5px;
            border: 1px solid #ddd;
            overflow: hidden;
            transition: background-color 0.2s ease;
            display: flex;
            flex-direction: column;
            position: relative;
        }
        .article-content {
            padding: 1.3em;
            flex-grow: 1;
            display: flex;
            flex-direction: column;
            position: relative;
            z-index: 1;
            cursor: pointer;
        }
        body.dark-theme>div>main>article {
            background-color: #444;
            border: none;
        }
        body.light-theme>div>main>article {
            background-color: #fff;
        }
        body.dark-theme>div>main>article:hover {
            background-color: #414141;
        }
        body.light-theme>div>main>article:hover {
            background-color: #fafafa;
        }
        .meta {
            font-size: 0.9em;
            margin-bottom: 0em;
            font-weight: 500;
            margin: 20px 0 0px 0;
            padding-bottom: 20px;
            border-bottom: 1px solid #ddd;
        }
        .pub-date {
            font-size: 0.8em;
            margin-bottom: 0.8em;
            font-weight: 400;
            text-align: right;
            font-family: Roboto;
        }
        .tags {
            font-size: 0.9em;
            margin-bottom: 0;
            position: absolute;
            bottom: 0px;
            font-weight: 300;
            font-family: 'Roboto Slab';
            background: #555;
            left: 0;
            width: 100%;
            padding: 10px 20px;
        }
        .abstract {
            position: relative;
            max-height: 170px;
            overflow: hidden;
            transition: max-height 0.3s ease;
            cursor: pointer;
        }
        .abstract.expanded {
            max-height: 1000px;
        }
        .abstract-toggle {
            position: absolute;
            bottom: 4px;
            right: 0;
            cursor: pointer;
            color: var(--primary-color);
            float: right;
            font-weight: 400;
        }
        .explanation {
            background-color: #e8f5e9;
            border-left: 4px solid var(--secondary-color);
            padding: 1em;
            margin-top: 1.5em;
        }
        .links {
            margin-top: 1.5em;
            margin-bottom: 20px;
        }
        .affiliations {
            margin-bottom: 50px;
            padding:10px;
            font-size: 0.9em;
            text-align: center
        }
        a {
            color: var(--primary-color);
            text-decoration: none;
            font-weight: 500;
            transition: color 0.3s ease;
        }
        .dark-theme a {
            color: var(--primary-color-dark);
        }
        a:hover {
            color: #e73838;
        }
        .light-theme {
            background-color: var(--body-color);
            color: #333333;
        }
        .dark-theme {
            background-color: #333333;
            color: #ffffff;
        }
        .theme-switch {
            position: absolute;
            top: 20px;
            right: 20px;
            display: flex;
            align-items: center;
        }
        .switch {
            position: relative;
            display: inline-block;
            width: 50px;
            height: 30px;
        }
        .switch input {
            opacity: 0;
            width: 0;
            height: 0;
        }
        .slider {
            position: absolute;
            cursor: pointer;
            top: 0;
            left: 0;
            right: 0;
            bottom: 0;
            background-color: #ccc;
            transition: .4s;
            border-radius: 30px;
        }
        .slider:before {
            position: absolute;
            content: "";
            height: 24px;
            width: 24px;
            left: 3px;
            bottom: 3px;
            background-color: white;
            transition: .4s;
            border-radius: 50%;
        }
        input:checked + .slider {
            background-color: var(--primary-color);
        }
        input:checked + .slider:before {
            transform: translateX(20px);
        }
        .switch-label {
            margin-right: 10px;
        }

        .sub-header-container {
            display: flex;
            justify-content: space-between;
            align-items: center;
            flex-wrap: wrap;
            gap: 15px;
            margin-top: 7px;
        }
        .sub-header-container-2 {
            display: flex;
            justify-content: left;
            align-items: center;
            flex-wrap: wrap;
            gap: 15px;
            margin: 0 auto;
        }
        .update-info-container {
            margin-top: 15px;
            margin-bottom: 0px;
            text-align: left;
            flex: 1;
        }
        .sort-container {
            margin-top: 15px;
            margin-bottom: 0px;
            text-align: right;
            flex: 2;
        }
        
        .category-toggle-container {
            display: inline-block;
            margin-top: 15px;
            margin-bottom: 10px;
            cursor: pointer;
        }
        .category-option-container {
            margin-top: 15px;
            margin-bottom: 10px;
            display: none;
            margin-left: auto;
        }
        .category-option-container.expanded {
            display: block;
        }

        .sort-dropdown {
            padding: 5px 10px;
            font-size: 16px;
            border-radius: 5px;
            border: 1px solid #ccc;
            background-color: white;
            color: var(--text-color);
            font-family: 'Roboto Slab', sans-serif;
        }
        .sort-label {
            margin-right: 10px;
            font-size: 1.0em !important;
        }        
        .dark-theme .sort-dropdown {
            background-color: #444;
            color: white;
            border-color: var(--text-color);
        }
        .title-sign {
            display: inline-block;
            transition: all 0.5s ease;            
        }
        .rotate {
            transform: rotate(45deg) translateY(-6px);
            transform-origin: center;
        }
        .title-text {
            display: inline;
            padding-left: 10px;
        }
        .category-filters {
            margin-top: 20px;
            margin-bottom: 20px;
            text-align: center;
            display: none;
        }
        .category-filters.expanded {
            display: block;
            margin-top: 10px;
        }
        .category-button {
            display: inline-block;
            margin: 5px;
            padding: 5px 10px;
            border-radius: 15px;
            background-color: #f0f0f0;
            color: #333;
            cursor: pointer;
            transition: background-color 0.3s ease;
        }
        .category-button.active {
            background-color: var(--primary-color);
            color: white;
        }
        .category-button.inactive:not(.active) {
            color: #ccc;
        }
        .dark-theme .category-button {
            background-color: #555;
            color: #fff;
        }
        .dark-theme .category-button.active {
            background-color: var(--primary-color);
        }
        .dark-theme .category-button.inactive:not(.active) {
            color: #888;
        }
        .clear-categories {
            display: inline-block;
            margin: 5px;
            padding: 5px 10px;
            border-radius: 15px;
            background-color: #f0f0f0;
            color: #333;
            cursor: pointer;
            transition: background-color 0.3s ease;
        }
        .clear-categories:hover {
            background-color: #bbb;
        }
        .svg-container {
            display: inline-block;
            position: relative;
            overflow: hidden;
        }
        .svg-container span {
            position: relative;
            z-index: 1;
        }
        .svg-container svg {
            position: absolute;
            bottom: 0;
            left: 0;
            z-index: 0;
        }

        .nav-menu {
            background-color: var(--menu-color);
            padding: 2px 0 2px 0;
            display: inline-block;
            position: relative;
            overflow: hidden;
            width: 100%;
        }        
        .nav-container {
            max-width: 1500px;
            margin: 0 auto;
            padding: 0 20px;
            display: flex;
            justify-content: left;
            gap: 3em;
        }
        .nav-container span a {
            color: white;
        }        
        .nav-item {
            color: white;
            padding: 3px 0px;
            cursor: pointer;
            font-weight: 400;
        }        
        .nav-item:hover {
            background-color: rgba(255, 255, 255, 0.1);
            border-color: rgba(255, 255, 255, 0.3);
        }        
        .language-flags {
            display: flex;
            gap: 7px;
            padding: 5px 0px;
            margin-left: auto;
        }
        .flag-svg {
            width: 22px;
            height: 22px;
            cursor: pointer;
            opacity: 0.4;
            transition: opacity 0.3s ease;
            border-radius: 2px;
        }
        .flag-svg.active {
            opacity: 1;
        }
        .flag-svg:hover {
            opacity: 0.8;
        }
        
        .dark-theme .nav-menu {
            background-color: #333;
        }
        .dark-theme .nav-item {
            color: white;
        }
        
        .dark-theme .nav-item:hover {
            background-color: rgba(255, 255, 255, 0.05);
        }

        .pointer { cursor: pointer; }

        .article-pdf-title-img {
            max-width: 100%;
            max-height: 400px;
            display: inline-block;
            margin-top: 10px;
            margin-bottom: 10px;
            border-radius: 5px;
        }
        .article-pdf-title-img-cont {
            text-align: center;
        }
        .dark-theme .article-pdf-title-img {
            opacity: 0.8;
            filter: grayscale(1);
        }

        @media (max-width: 600px) {
            .nav-container {
                flex-direction: row;
                gap: 1.5em;
            }            
            .nav-item {
                padding: 3px 0px;
            }
        }
        
        @media (max-width: 768px) {
            .category-filters {
                display: none;
            }
            .category-toggle {
                display: inline-block;
                width: 100%;
                text-align: left;
            }
            .category-filters.expanded {
                display: block;
                margin-top: 10px;
            }
        }
        @media (max-width: 600px) {
            .sub-header-container {
                flex-direction: column;
                align-items: flex-start;
            }
            .sort-container {
                width: 100%;
                display: flex;
                justify-content: left;
                margin: 0 auto;
            }
            .sort-dropdown {
                margin-left: auto;
            }
            .sort-label {
                margin-top: 5px;
                float: left;
            }

            .sub-header-container-2 {
                flex-direction: row;
                align-items: flex-start;
            }
            .update-info-container {
                text-align: left;
                width: 100%;
                margin-bottom: 0px;
            }
            .category-toggle-container {
                margin-top: 15px;
                text-align: left;
                margin-bottom: 10px;
            }
            .category-option-container {
                margin-top: 15px;
                text-align: center;
                margin-bottom: 10px;
            }            
            main {
                grid-template-columns: repeat(auto-fit);
                gap: 0em;
                padding: 10px 0 20px 0;
                margin: 0 -20px;
            }
            footer {
                margin-top: -20px;
            }
            article {
                border-radius: 0px;
            }
        }
    </style>
    <script>
    function toggleAbstract(id) {
        var abstract = document.getElementById('abstract-' + id);
        var toggle = document.getElementById('toggle-' + id);
        if (abstract.classList.contains('expanded')) {
            abstract.classList.remove('expanded');
            toggle.textContent = '...';
        } else {
            abstract.classList.add('expanded');
            toggle.textContent = '';
        }
    }
    function getTimeDiff(dateString, lang='ru') {
        const timeUnits = {
            ru: {
                minute: ["минуту", "минуты", "минут"],
                hour: ["час", "часа", "часов"],
                day: ["день", "дня", "дней"],
                justNow: "только что",
                ago: "назад"
            },
            en: {
                minute: ["minute", "minutes", "minutes"],
                hour: ["hour", "hours", "hours"],
                day: ["day", "days", "days"],
                justNow: "just now",
                ago: "ago"
            },
            zh: {
                minute: ["分钟", "分钟", "分钟"],
                hour: ["小时", "小时", "小时"],
                day: ["天", "天", "天"],
                justNow: "刚刚",
                ago: "前"
            }
        };

        function getPlural(number, words, lang) {
            if (lang === 'ru') {
                if (number % 10 === 1 && number % 100 !== 11) {
                    return words[0];
                } else if (number % 10 >= 2 && number % 10 <= 4 && (number % 100 < 10 || number % 100 >= 20)) {
                    return words[1];
                } else {
                    return words[2];
                }
            } else if (lang === 'en') {
                return number === 1 ? words[0] : words[1];
            } else {
                // Chinese doesn't need plural forms
                return words[0];
            }
        }

        function formatTimeDiff(number, unit, lang) {
            const unitWord = getPlural(number, timeUnits[lang][unit], lang);
            
            if (lang === 'zh') {
                return `${number}${unitWord}${timeUnits[lang].ago}`;
            } else {
                return `${number} ${unitWord} ${timeUnits[lang].ago}`;
            }
        }

        if (!['ru', 'en', 'zh'].includes(lang)) {
            throw new Error('Unsupported language. Supported languages are: ru, en, zh');
        }

        const pastDate = new Date(dateString.replace(" ", "T") + ":00Z");
        const currentDate = new Date();
        const diffInSeconds = Math.floor((currentDate - pastDate) / 1000);
        
        const minutes = Math.floor(diffInSeconds / 60);
        const hours = Math.floor(diffInSeconds / 3600);
        const days = Math.floor(diffInSeconds / 86400);

        if (minutes === 0) {
            return timeUnits[lang].justNow;
        } else if (minutes < 60) {
            return formatTimeDiff(minutes, 'minute', lang);
        } else if (hours < 24) {
            return formatTimeDiff(hours, 'hour', lang);
        } else {
            return formatTimeDiff(days, 'day', lang);
        }
    }
    function isToday(dateString) {
        const inputDate = new Date(dateString);
        const today = new Date();
        return (
            inputDate.getFullYear() === today.getFullYear() &&
            inputDate.getMonth() === today.getMonth() &&
            inputDate.getDate() === today.getDate()
        );
    }
    function isCurrentMonth(dateString) {
        const inputDate = new Date(dateString);
        const today = new Date();
        return (
            inputDate.getFullYear() === today.getFullYear() &&
            inputDate.getMonth() === today.getMonth()
        );
    }
    function formatArticlesTitle(number, lang='ru') {
        const lastDigit = number % 10;
        const lastTwoDigits = number % 100;
        let word;

        if (!['ru', 'en', 'zh'].includes(lang)) {
            throw new Error('Unsupported language. Supported languages are: ru, en, zh');
        }

        if (lang === 'ru') {
            if (lastTwoDigits >= 11 && lastTwoDigits <= 14) {
                word = "статей";
            } else if (lastDigit === 1) {
                word = "статья";
            } else if (lastDigit >= 2 && lastDigit <= 4) {
                word = "статьи";
            } else {
                word = "статей";
            }
        } else if (lang === 'en') {
            if (number === 1) {
                word = 'paper'
            } else {
                word = 'papers'
            }
        } else if (lang === 'zh') {
            word = "篇论文"
        }

        if (lang === 'zh') {
            return `${number}${word}`;
        } else {
            return `${number} ${word}`;
        }
    }
    </script>
</head>
<body class="light-theme">
    <header>
        <div class="container">            
            <a href="https://hfday.ru" class="a-clean"><h1 class="title-sign" id="doomgrad-icon">🔺</h1><h1 class="title-text" id="doomgrad">hf daily</h1></a>
            <p><span id="title-date">3 октября</span> | <span id="title-articles-count">24 papers</span></p>
        </div>
        <div class="theme-switch">
            <label class="switch">
                <input type="checkbox" id="theme-toggle">
                <span class="slider"></span>
            </label>
        </div>
    </header>
    <div class="nav-menu">
        <div class="nav-container">
            <span class="nav-item" id="nav-prev"><a href="/d/2024-10-02.html">⬅️ <span id="prev-date">02.10</span></a></span>
            <span class="nav-item" id="nav-next"><a href="/d/2024-10-04.html">➡️ <span id="next-date">04.10</span></a></span>
            <span class="nav-item" id="nav-monthly"><a href="/m/2024-10.html">📈 <span id='top-month-label'>Месяц</span></a></span>
            <div class="language-flags">
                <svg class="flag-svg" data-lang="ru" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><path fill="#1435a1" d="M1 11H31V21H1z"></path><path d="M5,4H27c2.208,0,4,1.792,4,4v4H1v-4c0-2.208,1.792-4,4-4Z" fill="#fff"></path><path d="M5,20H27c2.208,0,4,1.792,4,4v4H1v-4c0-2.208,1.792-4,4-4Z" transform="rotate(180 16 24)" fill="#c53a28"></path><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path></svg>
                <svg class="flag-svg" data-lang="zh" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><rect x="1" y="4" width="30" height="24" rx="4" ry="4" fill="#db362f"></rect><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path fill="#ff0" d="M7.958 10.152L7.19 7.786 6.421 10.152 3.934 10.152 5.946 11.614 5.177 13.979 7.19 12.517 9.202 13.979 8.433 11.614 10.446 10.152 7.958 10.152z"></path><path fill="#ff0" d="M12.725 8.187L13.152 8.898 13.224 8.072 14.032 7.886 13.269 7.562 13.342 6.736 12.798 7.361 12.035 7.037 12.461 7.748 11.917 8.373 12.725 8.187z"></path><path fill="#ff0" d="M14.865 10.372L14.982 11.193 15.37 10.46 16.187 10.602 15.61 10.007 15.997 9.274 15.253 9.639 14.675 9.044 14.793 9.865 14.048 10.23 14.865 10.372z"></path><path fill="#ff0" d="M15.597 13.612L16.25 13.101 15.421 13.13 15.137 12.352 14.909 13.149 14.081 13.179 14.769 13.642 14.541 14.439 15.194 13.928 15.881 14.391 15.597 13.612z"></path><path fill="#ff0" d="M13.26 15.535L13.298 14.707 12.78 15.354 12.005 15.062 12.46 15.754 11.942 16.402 12.742 16.182 13.198 16.875 13.236 16.047 14.036 15.827 13.26 15.535z"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path></svg>
                <svg class="flag-svg" data-lang="en" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><rect x="1" y="4" width="30" height="24" rx="4" ry="4" fill="#fff"></rect><path d="M1.638,5.846H30.362c-.711-1.108-1.947-1.846-3.362-1.846H5c-1.414,0-2.65,.738-3.362,1.846Z" fill="#a62842"></path><path d="M2.03,7.692c-.008,.103-.03,.202-.03,.308v1.539H31v-1.539c0-.105-.022-.204-.03-.308H2.03Z" fill="#a62842"></path><path fill="#a62842" d="M2 11.385H31V13.231H2z"></path><path fill="#a62842" d="M2 15.077H31V16.923000000000002H2z"></path><path fill="#a62842" d="M1 18.769H31V20.615H1z"></path><path d="M1,24c0,.105,.023,.204,.031,.308H30.969c.008-.103,.031-.202,.031-.308v-1.539H1v1.539Z" fill="#a62842"></path><path d="M30.362,26.154H1.638c.711,1.108,1.947,1.846,3.362,1.846H27c1.414,0,2.65-.738,3.362-1.846Z" fill="#a62842"></path><path d="M5,4h11v12.923H1V8c0-2.208,1.792-4,4-4Z" fill="#102d5e"></path><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path><path fill="#fff" d="M4.601 7.463L5.193 7.033 4.462 7.033 4.236 6.338 4.01 7.033 3.279 7.033 3.87 7.463 3.644 8.158 4.236 7.729 4.827 8.158 4.601 7.463z"></path><path fill="#fff" d="M7.58 7.463L8.172 7.033 7.441 7.033 7.215 6.338 6.989 7.033 6.258 7.033 6.849 7.463 6.623 8.158 7.215 7.729 7.806 8.158 7.58 7.463z"></path><path fill="#fff" d="M10.56 7.463L11.151 7.033 10.42 7.033 10.194 6.338 9.968 7.033 9.237 7.033 9.828 7.463 9.603 8.158 10.194 7.729 10.785 8.158 10.56 7.463z"></path><path fill="#fff" d="M6.066 9.283L6.658 8.854 5.927 8.854 5.701 8.158 5.475 8.854 4.744 8.854 5.335 9.283 5.109 9.979 5.701 9.549 6.292 9.979 6.066 9.283z"></path><path fill="#fff" d="M9.046 9.283L9.637 8.854 8.906 8.854 8.68 8.158 8.454 8.854 7.723 8.854 8.314 9.283 8.089 9.979 8.68 9.549 9.271 9.979 9.046 9.283z"></path><path fill="#fff" d="M12.025 9.283L12.616 8.854 11.885 8.854 11.659 8.158 11.433 8.854 10.702 8.854 11.294 9.283 11.068 9.979 11.659 9.549 12.251 9.979 12.025 9.283z"></path><path fill="#fff" d="M6.066 12.924L6.658 12.494 5.927 12.494 5.701 11.799 5.475 12.494 4.744 12.494 5.335 12.924 5.109 13.619 5.701 13.19 6.292 13.619 6.066 12.924z"></path><path fill="#fff" d="M9.046 12.924L9.637 12.494 8.906 12.494 8.68 11.799 8.454 12.494 7.723 12.494 8.314 12.924 8.089 13.619 8.68 13.19 9.271 13.619 9.046 12.924z"></path><path fill="#fff" d="M12.025 12.924L12.616 12.494 11.885 12.494 11.659 11.799 11.433 12.494 10.702 12.494 11.294 12.924 11.068 13.619 11.659 13.19 12.251 13.619 12.025 12.924z"></path><path fill="#fff" d="M13.539 7.463L14.13 7.033 13.399 7.033 13.173 6.338 12.947 7.033 12.216 7.033 12.808 7.463 12.582 8.158 13.173 7.729 13.765 8.158 13.539 7.463z"></path><path fill="#fff" d="M4.601 11.104L5.193 10.674 4.462 10.674 4.236 9.979 4.01 10.674 3.279 10.674 3.87 11.104 3.644 11.799 4.236 11.369 4.827 11.799 4.601 11.104z"></path><path fill="#fff" d="M7.58 11.104L8.172 10.674 7.441 10.674 7.215 9.979 6.989 10.674 6.258 10.674 6.849 11.104 6.623 11.799 7.215 11.369 7.806 11.799 7.58 11.104z"></path><path fill="#fff" d="M10.56 11.104L11.151 10.674 10.42 10.674 10.194 9.979 9.968 10.674 9.237 10.674 9.828 11.104 9.603 11.799 10.194 11.369 10.785 11.799 10.56 11.104z"></path><path fill="#fff" d="M13.539 11.104L14.13 10.674 13.399 10.674 13.173 9.979 12.947 10.674 12.216 10.674 12.808 11.104 12.582 11.799 13.173 11.369 13.765 11.799 13.539 11.104z"></path><path fill="#fff" d="M4.601 14.744L5.193 14.315 4.462 14.315 4.236 13.619 4.01 14.315 3.279 14.315 3.87 14.744 3.644 15.44 4.236 15.01 4.827 15.44 4.601 14.744z"></path><path fill="#fff" d="M7.58 14.744L8.172 14.315 7.441 14.315 7.215 13.619 6.989 14.315 6.258 14.315 6.849 14.744 6.623 15.44 7.215 15.01 7.806 15.44 7.58 14.744z"></path><path fill="#fff" d="M10.56 14.744L11.151 14.315 10.42 14.315 10.194 13.619 9.968 14.315 9.237 14.315 9.828 14.744 9.603 15.44 10.194 15.01 10.785 15.44 10.56 14.744z"></path><path fill="#fff" d="M13.539 14.744L14.13 14.315 13.399 14.315 13.173 13.619 12.947 14.315 12.216 14.315 12.808 14.744 12.582 15.44 13.173 15.01 13.765 15.44 13.539 14.744z"></path></svg>
            </div>
        </div>
    </div>
    <div class="container">
        <div class="sub-header-container">
            <div class="update-info-container">
                <label class="update-info-label" id="timeDiff"></label>
            </div>
            <div class="sort-container">
                <label class="sort-label">🔀 <span id="sort-label-text">Сортировка по</span></label>
                <select id="sort-dropdown" class="sort-dropdown">
                    <option value="default">рейтингу</option>
                    <option value="pub_date">дате публикации</option>
                    <option value="issue_id">добавлению на HF</option>
                </select>
            </div>
        </div>
        <div class="sub-header-container-2">
            <div class="category-toggle-container">
                <div class="svg-container">
                    <span id="category-toggle">🏷️ Фильтр</span>
                    <svg height="3" width="200">
                        <line x1="0" y1="0" x2="200" y2="0" 
                            stroke="black" 
                            stroke-width="2" 
                            stroke-dasharray="3, 3" />
                    </svg>
                </div>
            </div>
            <div class="category-option-container" id="category-options">                
                <label class="pointer" for="filter-logic-or"><input type="radio" id="filter-logic-or" name="filter-logic" value="or"> A∪B</label>
                <label class="pointer" for="filter-logic-and"><input type="radio" id="filter-logic-and" name="filter-logic" value="and"> A∩B</label>
            </div> 
        </div>
        <div class="category-filters" id="category-filters">
            <span class="clear-categories" id="clear-categories">🧹</span>
            <!-- Categories -->
        </div>
        <main id="articles-container">
            <!-- Articles -->
        </main>
    </div>
    <footer>
        <div class="container">
            <p><a style="color:white;" href="https://t.me/doomgrad">doomgrad</a> ✖️ <a style="color:white;" href="https://huggingface.co/papers">hugging face</a></p>
        </div>
    </footer>
    <script>
        // Language handling
        let currentLang = localStorage.getItem('selectedLang') || 'en';
        let feedDate = {'ru': '3 октября', 'en': 'October 3', 'zh': '10月3日'};
        let feedDateNext = {'ru': '04.10', 'en': '10/04', 'zh': '10月4日'};
        let feedDatePrev = {'ru': '02.10', 'en': '10/02', 'zh': '10月2日'};
        let filterLabel = {'ru': 'Фильтр', 'en': 'Topics', 'zh': '主题筛选'}
        let publishedLabel = {'ru': 'статья от ', 'en': 'published on ', 'zh': '发表于'}
        let sortLabel = {'ru': 'Сортировка по', 'en': 'Sort by', 'zh': '排序方式'}
        let paperLabel = {'ru': 'Статья', 'en': 'Paper', 'zh': '论文'}
        let topMonthLabel = {'ru': 'Месяц', 'en': 'Month', 'zh': '月度论文'}
        let topDayLabel = {'ru': 'День', 'en': 'Day', 'zh': '日度论文'}
        
        function initializeLanguageFlags() {
            const flags = document.querySelectorAll('.flag-svg');
            flags.forEach(flag => {
                if (flag.dataset.lang === currentLang) {
                    flag.classList.add('active');
                }
                flag.addEventListener('click', () => {
                    flags.forEach(f => f.classList.remove('active'));
                    flag.classList.add('active');
                    currentLang = flag.dataset.lang;
                    localStorage.setItem('selectedLang', currentLang);
                    updateTimeDiffs();
                    updateLocalization();
                    filterAndRenderArticles();
                });
            });
        }
        function toggleTheme() {
            const body = document.body;
            body.classList.toggle('light-theme');
            body.classList.toggle('dark-theme');

            const isDarkMode = body.classList.contains('dark-theme');
            localStorage.setItem('darkMode', isDarkMode);
            
            if (isDarkMode) {
                const title = document.getElementById('doomgrad');
                title.innerHTML = "hf nightly";
                const titleSign = document.getElementById('doomgrad-icon');
                titleSign.classList.add('rotate');
            }  else {
                const title = document.getElementById('doomgrad');
                title.innerHTML = "hf daily";
                const titleSign = document.getElementById('doomgrad-icon');
                titleSign.classList.remove('rotate');
            }
        }

        const articlesData = [{'id': 'https://huggingface.co/papers/2410.01044', 'title': 'RATIONALYST: Pre-training Process-Supervision for Improving Reasoning', 'url': 'https://huggingface.co/papers/2410.01044', 'abstract': 'The reasoning steps generated by LLMs might be incomplete, as they mimic logical leaps common in everyday communication found in their pre-training data: underlying rationales are frequently left implicit (unstated). To address this challenge, we introduce RATIONALYST, a model for process-supervision of reasoning based on pre-training on a vast collection of rationale annotations extracted from unlabeled data. We extract 79k rationales from web-scale unlabelled dataset (the Pile) and a combination of reasoning datasets with minimal human intervention. This web-scale pre-training for reasoning allows RATIONALYST to consistently generalize across diverse reasoning tasks, including mathematical, commonsense, scientific, and logical reasoning. Fine-tuned from LLaMa-3-8B, RATIONALYST improves the accuracy of reasoning by an average of 3.9% on 7 representative reasoning benchmarks. It also demonstrates superior performance compared to significantly larger verifiers like GPT-4 and similarly sized models fine-tuned on matching training sets.', 'score': 34, 'issue_id': 1, 'pub_date': '2024-10-01', 'pub_date_card': {'ru': '1 октября', 'en': 'October 1', 'zh': '10月1日'}, 'hash': '7443e92db63fd869', 'authors': ['Dongwei Jiang', 'Guoxuan Wang', 'Yining Lu', 'Andrew Wang', 'Jingyu Zhang', 'Chuyu Liu', 'Benjamin Van Durme', 'Daniel Khashabi'], 'affiliations': ['Johns Hopkins University', 'University of Notre Dame'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01044.jpg', 'data': {'categories': ['#science', '#reasoning', '#dataset', '#multilingual', '#training', '#math', '#data', '#transfer_learning', '#benchmark', '#architecture', '#synthetic'], 'emoji': '🧠', 'ru': {'title': 'RATIONALYST: улучшение рассуждений в LLM с помощью предобучения на веб-данных', 'desc': 'В этой статье представлен RATIONALYST - новая модель для улучшения процесса рассуждений в больших языковых моделях (LLM). Модель предобучена на 79 тысячах обоснований, извлеченных из веб-данных и наборов данных для рассуждений. RATIONALYST демонстрирует улучшение точности на 3.9% в среднем на 7 эталонных тестах для различных типов рассуждений. Модель превосходит по производительности значительно более крупные верификаторы, такие как GPT-4.'}, 'en': {'title': 'RATIONALYST: Enhancing Reasoning with Explicit Rationale Learning', 'desc': 'RATIONALYST is a new model designed to improve reasoning in language models by addressing the issue of incomplete reasoning steps. It achieves this by pre-training on a large dataset of rationale annotations, which helps the model learn to make its reasoning more explicit. By extracting 79,000 rationales from a vast collection of unlabeled data, RATIONALYST can generalize effectively across various reasoning tasks, such as mathematical and commonsense reasoning. The model shows a notable improvement in accuracy, outperforming larger models like GPT-4 on several reasoning benchmarks.'}, 'zh': {'title': 'RATIONALYST：提升推理准确性的创新模型', 'desc': '本论文介绍了一种名为RATIONALYST的模型，旨在改善大型语言模型（LLMs）在推理过程中的不足。该模型通过在大量未标注数据中提取的79,000个推理理由进行预训练，从而实现了对推理过程的监督。RATIONALYST在多种推理任务上表现出色，包括数学、常识、科学和逻辑推理，平均提高了3.9%的准确率。与更大规模的验证模型如GPT-4相比，RATIONALYST在推理准确性上也表现出更优的性能。'}}}, {'id': 'https://huggingface.co/papers/2410.01680', 'title': 'PHI-S: Distribution Balancing for Label-Free Multi-Teacher Distillation', 'url': 'https://huggingface.co/papers/2410.01680', 'abstract': 'Various visual foundation models have distinct strengths and weaknesses, both of which can be improved through heterogeneous multi-teacher knowledge distillation without labels, termed "agglomerative models." We build upon this body of work by studying the effect of the teachers\' activation statistics, particularly the impact of the loss function on the resulting student model quality. We explore a standard toolkit of statistical normalization techniques to better align the different distributions and assess their effects. Further, we examine the impact on downstream teacher-matching metrics, which motivates the use of Hadamard matrices. With these matrices, we demonstrate useful properties, showing how they can be used for isotropic standardization, where each dimension of a multivariate distribution is standardized using the same scale. We call this technique "PHI Standardization" (PHI-S) and empirically demonstrate that it produces the best student model across the suite of methods studied.', 'score': 32, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'dc301080cf253805', 'authors': ['Mike Ranzinger', 'Jon Barker', 'Greg Heinrich', 'Pavlo Molchanov', 'Bryan Catanzaro', 'Andrew Tao'], 'affiliations': ['NVIDIA'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01680.jpg', 'data': {'categories': ['#cv', '#training', '#optimization', '#transfer_learning', '#architecture'], 'emoji': '🧠', 'ru': {'title': 'PHI-стандартизация: новый подход к агломеративному обучению визуальных моделей', 'desc': "Статья исследует улучшение визуальных моделей с помощью агломеративного обучения без учителя. Авторы изучают влияние статистики активации учителей и функции потерь на качество модели-ученика. Рассматриваются различные методы статистической нормализации для лучшего выравнивания распределений. Предлагается новый метод 'PHI-стандартизации' на основе матриц Адамара, показывающий наилучшие результаты среди исследованных подходов."}, 'en': {'title': 'Enhancing Student Models with PHI Standardization', 'desc': "This paper explores how to improve visual foundation models using a technique called heterogeneous multi-teacher knowledge distillation without labels, known as agglomerative models. It focuses on the importance of the teachers' activation statistics and how different loss functions affect the quality of the student model. The authors investigate various statistical normalization techniques to align the distributions of the teachers' outputs and their impact on matching metrics. They introduce a new method called PHI Standardization (PHI-S), which standardizes multivariate distributions effectively, leading to superior student model performance."}, 'zh': {'title': '聚合模型：提升视觉模型的最佳实践', 'desc': '本文研究了不同视觉基础模型在无标签情况下通过异构多教师知识蒸馏的改进方法，称为“聚合模型”。我们重点分析了教师模型的激活统计特性，特别是损失函数对学生模型质量的影响。通过使用统计归一化技术，我们更好地对齐不同分布，并评估其效果。此外，我们引入了Hadamard矩阵，展示了其在各维度标准化中的有用特性，提出了“PHI标准化”（PHI-S）技术，并实验证明其在多种方法中产生了最佳的学生模型。'}}}, {'id': 'https://huggingface.co/papers/2410.01215', 'title': 'From Code to Correctness: Closing the Last Mile of Code Generation with Hierarchical Debugging', 'url': 'https://huggingface.co/papers/2410.01215', 'abstract': 'While large language models have made significant strides in code generation, the pass rate of the generated code is bottlenecked on subtle errors, often requiring human intervention to pass tests, especially for complex problems. Existing LLM-based debugging systems treat generated programs as monolithic units, failing to address bugs at multiple levels of granularity, from low-level syntax errors to high-level algorithmic flaws. In this paper, we introduce Multi-Granularity Debugger (MGDebugger), a hierarchical code debugger by isolating, identifying, and resolving bugs at various levels of granularity. MGDebugger decomposes problematic code into a hierarchical tree structure of subfunctions, with each level representing a particular granularity of error. During debugging, it analyzes each subfunction and iteratively resolves bugs in a bottom-up manner. To effectively test each subfunction, we propose an LLM-simulated Python executor, which traces code execution and tracks important variable states to pinpoint errors accurately. Extensive experiments demonstrate that MGDebugger outperforms existing debugging systems, achieving an 18.9% improvement in accuracy over seed generations in HumanEval and a 97.6% repair success rate in HumanEvalFix. Furthermore, MGDebugger effectively fixes bugs across different categories and difficulty levels, demonstrating its robustness and effectiveness.', 'score': 30, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'c2e7c70ae8f76f9b', 'authors': ['Yuling Shi', 'Songsong Wang', 'Chengcheng Wan', 'Xiaodong Gu'], 'affiliations': ['East China Normal University', 'Shanghai Jiao Tong University', 'UC Davis'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01215.jpg', 'data': {'categories': ['#reasoning', '#inference', '#interpretability', '#plp', '#optimization', '#architecture'], 'emoji': '🔍', 'ru': {'title': 'Иерархический отладчик кода нового поколения для ИИ-генерации', 'desc': 'Статья представляет новый подход к отладке кода, генерируемого большими языковыми моделями (LLM). Предложенный метод, названный Multi-Granularity Debugger (MGDebugger), декомпозирует проблемный код в иерархическую структуру подфункций и анализирует ошибки на разных уровнях детализации. MGDebugger использует LLM-симулированный исполнитель Python для точного определения ошибок. Эксперименты показали значительное улучшение точности генерации кода и высокую эффективность исправления ошибок различных типов и уровней сложности.'}, 'en': {'title': 'Hierarchical Debugging for Enhanced Code Generation Accuracy', 'desc': 'This paper presents the Multi-Granularity Debugger (MGDebugger), a novel hierarchical debugging system designed to improve the accuracy of code generated by large language models (LLMs). Unlike traditional debugging systems that treat code as a single unit, MGDebugger breaks down code into a tree structure of subfunctions, allowing for the identification and resolution of errors at various levels of granularity. The system employs an LLM-simulated Python executor to trace code execution and monitor variable states, enabling precise error detection. Experimental results show that MGDebugger significantly enhances debugging performance, achieving higher accuracy and repair success rates compared to existing methods.'}, 'zh': {'title': '多粒度调试，提升代码修复效率', 'desc': '本文介绍了一种新的代码调试系统，称为多粒度调试器（MGDebugger）。该系统通过将代码分解为层次树结构，能够在不同粒度上识别和解决错误，从低级语法错误到高级算法缺陷。MGDebugger采用自底向上的方法，逐个分析子函数并解决问题，确保调试过程的高效性。实验结果表明，MGDebugger在代码修复的准确性和成功率上均优于现有的调试系统。'}}}, {'id': 'https://huggingface.co/papers/2410.01647', 'title': '3DGS-DET: Empower 3D Gaussian Splatting with Boundary Guidance and Box-Focused Sampling for 3D Object Detection', 'url': 'https://huggingface.co/papers/2410.01647', 'abstract': 'Neural Radiance Fields (NeRF) are widely used for novel-view synthesis and have been adapted for 3D Object Detection (3DOD), offering a promising approach to 3DOD through view-synthesis representation. However, NeRF faces inherent limitations: (i) limited representational capacity for 3DOD due to its implicit nature, and (ii) slow rendering speeds. Recently, 3D Gaussian Splatting (3DGS) has emerged as an explicit 3D representation that addresses these limitations. Inspired by these advantages, this paper introduces 3DGS into 3DOD for the first time, identifying two main challenges: (i) Ambiguous spatial distribution of Gaussian blobs: 3DGS primarily relies on 2D pixel-level supervision, resulting in unclear 3D spatial distribution of Gaussian blobs and poor differentiation between objects and background, which hinders 3DOD; (ii) Excessive background blobs: 2D images often include numerous background pixels, leading to densely reconstructed 3DGS with many noisy Gaussian blobs representing the background, negatively affecting detection. To tackle the challenge (i), we leverage the fact that 3DGS reconstruction is derived from 2D images, and propose an elegant and efficient solution by incorporating 2D Boundary Guidance to significantly enhance the spatial distribution of Gaussian blobs, resulting in clearer differentiation between objects and their background. To address the challenge (ii), we propose a Box-Focused Sampling strategy using 2D boxes to generate object probability distribution in 3D spaces, allowing effective probabilistic sampling in 3D to retain more object blobs and reduce noisy background blobs. Benefiting from our designs, our 3DGS-DET significantly outperforms the SOTA NeRF-based method, NeRF-Det, achieving improvements of +6.6 on mAP@0.25 and +8.1 on mAP@0.5 for the ScanNet dataset, and impressive +31.5 on mAP@0.25 for the ARKITScenes dataset.', 'score': 28, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '951512e0e25bc7da', 'authors': ['Yang Cao', 'Yuanliang Jv', 'Dan Xu'], 'affiliations': ['Department of Computer Science and Engineering, HKUST'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01647.jpg', 'data': {'categories': ['#dataset', '#cv', '#graphs', '#optimization', '#benchmark', '#3d'], 'emoji': '🕵️', 'ru': {'title': '3DGS-DET: Революция в 3D-детекции объектов', 'desc': 'Статья представляет новый подход к задаче 3D-детекции объектов, основанный на методе 3D Gaussian Splatting (3DGS). Авторы предлагают решение двух основных проблем: неоднозначное пространственное распределение гауссовых блобов и избыточное количество фоновых блобов. Для решения первой проблемы вводится 2D Boundary Guidance, улучшающее пространственное распределение блобов. Вторая проблема решается с помощью стратегии Box-Focused Sampling, которая позволяет эффективно выбирать блобы объектов и уменьшать количество шумовых фоновых блобов.'}, 'en': {'title': 'Enhancing 3D Object Detection with 3D Gaussian Splatting', 'desc': 'This paper presents a novel approach to 3D Object Detection (3DOD) by integrating 3D Gaussian Splatting (3DGS) with traditional methods. The authors identify two main challenges with 3DGS: unclear spatial distribution of Gaussian blobs and excessive background noise from 2D images. To improve the clarity of object differentiation, they introduce 2D Boundary Guidance, which enhances the spatial arrangement of Gaussian blobs. Additionally, they propose a Box-Focused Sampling strategy to effectively reduce background noise while retaining important object information, leading to significant performance improvements over existing methods.'}, 'zh': {'title': '3DGS：提升三维物体检测的新方法', 'desc': '神经辐射场（NeRF）在新视角合成和三维物体检测（3DOD）中得到了广泛应用，但存在一些固有的局限性，如隐式表示导致的表示能力有限和渲染速度慢。最近，三维高斯点云（3DGS）作为一种显式三维表示方法，解决了这些问题。本文首次将3DGS引入3DOD，提出了两个主要挑战：高斯斑点的空间分布模糊和背景斑点过多。为了解决这些挑战，本文提出了2D边界引导和基于盒子的采样策略，从而显著提高了物体与背景的区分度，并减少了噪声背景斑点。'}}}, {'id': 'https://huggingface.co/papers/2410.01748', 'title': 'Not All LLM Reasoners Are Created Equal', 'url': 'https://huggingface.co/papers/2410.01748', 'abstract': 'We study the depth of grade-school math (GSM) problem-solving capabilities of LLMs. To this end, we evaluate their performance on pairs of existing math word problems together so that the answer to the second problem depends on correctly answering the first problem. Our findings reveal a significant reasoning gap in most LLMs, that is performance difference between solving the compositional pairs and solving each question independently. This gap is more pronounced in smaller, more cost-efficient, and math-specialized models. Moreover, instruction-tuning recipes and code generation have varying effects across LLM sizes, while finetuning on GSM can lead to task overfitting. Our analysis indicates that large reasoning gaps are not because of test-set leakage, but due to distraction from additional context and poor second-hop reasoning. Overall, LLMs exhibit systematic differences in their reasoning abilities, despite what their performance on standard benchmarks indicates.', 'score': 27, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '4179f096b2f31dbd', 'authors': ['Arian Hosseini', 'Alessandro Sordoni', 'Daniel Toyama', 'Aaron Courville', 'Rishabh Agarwal'], 'affiliations': ['Google DeepMind', 'Microsoft Research', 'Mila'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01748.jpg', 'data': {'categories': ['#reasoning', '#leakage', '#training', '#math', '#optimization', '#benchmark', '#small_models'], 'emoji': '🧮', 'ru': {'title': 'Языковые модели спотыкаются на связанных математических задачах', 'desc': 'Исследователи изучают глубину способностей языковых моделей (LLM) решать математические задачи школьного уровня. Они оценивают производительность моделей на парах связанных задач, где ответ на вторую зависит от правильного решения первой. Результаты показывают значительный разрыв в рассуждениях у большинства LLM, особенно заметный в меньших, более экономичных и специализированных на математике моделях. Анализ указывает на то, что большие разрывы в рассуждениях связаны с отвлечением на дополнительный контекст и слабым рассуждением на втором шаге, а не с утечкой тестовых данных.'}, 'en': {'title': 'Uncovering Reasoning Gaps in LLMs for Math Problem Solving', 'desc': 'This paper investigates how well large language models (LLMs) can solve grade-school math problems, especially when the answer to one problem relies on the answer to another. The study finds that there is a notable reasoning gap, meaning LLMs perform worse when problems are connected compared to when they are solved separately. Smaller and more specialized models show an even larger gap in their reasoning abilities. The research suggests that this gap is not due to issues like test-set leakage, but rather because of distractions from extra information and difficulties in multi-step reasoning.'}, 'zh': {'title': '揭示大型语言模型的推理差距', 'desc': '我们研究了大型语言模型（LLMs）在解决小学数学问题（GSM）方面的能力。通过评估模型在一对数学应用题上的表现，我们发现大多数LLMs在解决组合问题时存在显著的推理差距。这个差距在较小、成本效益高且专注于数学的模型中更为明显。此外，指令调优和代码生成对不同规模的LLMs有不同的影响，而在GSM上进行微调可能导致任务过拟合。'}}}, {'id': 'https://huggingface.co/papers/2410.01744', 'title': 'LEOPARD : A Vision Language Model For Text-Rich Multi-Image Tasks', 'url': 'https://huggingface.co/papers/2410.01744', 'abstract': "Text-rich images, where text serves as the central visual element guiding the overall understanding, are prevalent in real-world applications, such as presentation slides, scanned documents, and webpage snapshots. Tasks involving multiple text-rich images are especially challenging, as they require not only understanding the content of individual images but reasoning about inter-relationships and logical flows across multiple visual inputs. Despite the importance of these scenarios, current multimodal large language models (MLLMs) struggle to handle such tasks due to two key challenges: (1) the scarcity of high-quality instruction tuning datasets for text-rich multi-image scenarios, and (2) the difficulty in balancing image resolution with visual feature sequence length. To address these challenges, we propose \\OurMethod, a MLLM designed specifically for handling vision-language tasks involving multiple text-rich images. First, we curated about one million high-quality multimodal instruction-tuning data, tailored to text-rich, multi-image scenarios. Second, we developed an adaptive high-resolution multi-image encoding module to dynamically optimize the allocation of visual sequence length based on the original aspect ratios and resolutions of the input images. Experiments across a wide range of benchmarks demonstrate our model's superior capabilities in text-rich, multi-image evaluations and competitive performance in general domain evaluations.", 'score': 25, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'f787d537107fa831', 'authors': ['Mengzhao Jia', 'Wenhao Yu', 'Kaixin Ma', 'Tianqing Fang', 'Zhihan Zhang', 'Siru Ouyang', 'Hongming Zhang', 'Meng Jiang', 'Dong Yu'], 'affiliations': ['Tencent AI Seattle Lab', 'UIUC', 'University of Notre Dame'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01744.jpg', 'data': {'categories': ['#reasoning', '#dataset', '#cv', '#training', '#data', '#optimization', '#transfer_learning', '#benchmark', '#architecture', '#synthetic', '#multimodal'], 'emoji': '📊', 'ru': {'title': 'Революция в обработке текстовых изображений: новая MLLM для многозадачного анализа', 'desc': 'Статья представляет новый мультимодальный языковой модель (MLLM) для обработки задач, связанных с несколькими изображениями, богатыми текстом. Авторы создали обширный набор данных для обучения модели на таких сценариях. Они также разработали адаптивный модуль кодирования изображений высокого разрешения для оптимизации обработки визуальной информации. Эксперименты показали превосходство предложенной модели в задачах с текстовыми изображениями и конкурентоспособность в общих задачах.'}, 'en': {'title': 'Empowering Multimodal Understanding with OurMethod', 'desc': 'This paper introduces a new multimodal large language model (MLLM) called \textit{OurMethod}, which is designed to effectively process and understand multiple text-rich images. The authors address two main challenges: the lack of quality datasets for training and the need to balance image resolution with the length of visual features. They created a dataset of about one million high-quality instruction-tuning examples specifically for text-rich, multi-image tasks. Additionally, they developed a novel encoding module that adapts to the resolutions of input images, leading to improved performance in evaluating text-rich, multi-image scenarios.'}, 'zh': {'title': '专为文本丰富图像设计的多模态模型', 'desc': '本文提出了一种新的多模态大语言模型（MLLM），专门用于处理包含多个文本丰富图像的视觉-语言任务。我们首先收集了约一百万个高质量的多模态指令调优数据，专门针对文本丰富的多图像场景。其次，我们开发了一种自适应高分辨率多图像编码模块，能够根据输入图像的原始纵横比和分辨率动态优化视觉序列长度的分配。实验结果表明，我们的模型在文本丰富的多图像评估中表现优越，并在一般领域评估中也具有竞争力。'}}}, {'id': 'https://huggingface.co/papers/2410.01257', 'title': 'HelpSteer2-Preference: Complementing Ratings with Preferences', 'url': 'https://huggingface.co/papers/2410.01257', 'abstract': 'Reward models are critical for aligning models to follow instructions, and are typically trained following one of two popular paradigms: Bradley-Terry style or Regression style. However, there is a lack of evidence that either approach is better than the other, when adequately matched for data. This is primarily because these approaches require data collected in different (but incompatible) formats, meaning that adequately matched data is not available in existing public datasets. To tackle this problem, we release preference annotations (designed for Bradley-Terry training) to complement existing ratings (designed for Regression style training) in the HelpSteer2 dataset. To improve data interpretability, preference annotations are accompanied with human-written justifications. Using this data, we conduct the first head-to-head comparison of Bradley-Terry and Regression models when adequately matched for data. Based on insights derived from such a comparison, we propose a novel approach to combine Bradley-Terry and Regression reward modeling. A Llama-3.1-70B-Instruct model tuned with this approach scores 94.1 on RewardBench, emerging top of more than 140 reward models as of 1 Oct 2024. We also demonstrate the effectiveness of this reward model at aligning models to follow instructions in RLHF. We open-source this dataset (CC-BY-4.0 license) at https://huggingface.co/datasets/nvidia/HelpSteer2 and openly release the trained Reward Model at https://huggingface.co/nvidia/Llama-3.1-Nemotron-70B-Reward', 'score': 21, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'b46023070ee2b9c9', 'authors': ['Zhilin Wang', 'Alexander Bukharin', 'Olivier Delalleau', 'Daniel Egert', 'Gerald Shen', 'Jiaqi Zeng', 'Oleksii Kuchaiev', 'Yi Dong'], 'affiliations': ['NVIDIA'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01257.jpg', 'data': {'categories': ['#dataset', '#training', '#interpretability', '#alignment', '#benchmark', '#open_source', '#rlhf'], 'emoji': '🏆', 'ru': {'title': 'Объединение лучшего из двух миров в обучении моделей вознаграждения', 'desc': 'Статья представляет сравнительный анализ двух популярных подходов к обучению моделей вознаграждения: стиль Брэдли-Терри и регрессионный стиль. Авторы выпустили новый набор данных HelpSteer2, содержащий аннотации предпочтений и рейтинги для справедливого сравнения этих методов. На основе результатов исследования предложен новый подход, объединяющий оба стиля обучения моделей вознаграждения. Модель Llama-3.1-70B-Instruct, обученная с использованием этого подхода, достигла наивысшего результата в 94.1 балла на бенчмарке RewardBench.'}, 'en': {'title': 'Bridging Reward Models: A New Approach for Better Alignment', 'desc': 'This paper discusses the importance of reward models in aligning machine learning models to follow instructions. It compares two popular training paradigms for these models: Bradley-Terry and Regression styles, highlighting the lack of compatible data for a fair comparison. To address this, the authors introduce preference annotations to the HelpSteer2 dataset, which allows for a direct comparison of the two approaches. They also propose a new method that combines both paradigms, resulting in a highly effective reward model that outperforms others in the field.'}, 'zh': {'title': '奖励模型的创新对比与结合', 'desc': '本论文探讨了奖励模型在指令对齐中的重要性，主要比较了Bradley-Terry风格和回归风格的训练方法。由于这两种方法需要不同格式的数据，导致现有公共数据集中缺乏适当匹配的数据。为了解决这个问题，我们在HelpSteer2数据集中发布了用于Bradley-Terry训练的偏好注释，并附上了人类撰写的理由，以提高数据的可解释性。通过对比实验，我们提出了一种新方法，将Bradley-Terry和回归奖励建模相结合，最终在RewardBench上取得了94.1的高分。'}}}, {'id': 'https://huggingface.co/papers/2410.01463', 'title': 'Selective Aggregation for Low-Rank Adaptation in Federated Learning', 'url': 'https://huggingface.co/papers/2410.01463', 'abstract': 'We investigate LoRA in federated learning through the lens of the asymmetry analysis of the learned A and B matrices. In doing so, we uncover that A matrices are responsible for learning general knowledge, while B matrices focus on capturing client-specific knowledge. Based on this finding, we introduce Federated Share-A Low-Rank Adaptation (FedSA-LoRA), which employs two low-rank trainable matrices A and B to model the weight update, but only A matrices are shared with the server for aggregation. Moreover, we delve into the relationship between the learned A and B matrices in other LoRA variants, such as rsLoRA and VeRA, revealing a consistent pattern. Consequently, we extend our FedSA-LoRA method to these LoRA variants, resulting in FedSA-rsLoRA and FedSA-VeRA. In this way, we establish a general paradigm for integrating LoRA with FL, offering guidance for future work on subsequent LoRA variants combined with FL. Extensive experimental results on natural language understanding and generation tasks demonstrate the effectiveness of the proposed method.', 'score': 18, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '44e5fb0db92df32b', 'authors': ['Pengxin Guo', 'Shuang Zeng', 'Yanran Wang', 'Huijie Fan', 'Feifei Wang', 'Liangqiong Qu'], 'affiliations': ['Shenyang Institute of Automation, Chinese Academy of Sciences', 'Stanford University', 'The University of Hong Kong'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01463.jpg', 'data': {'categories': ['#training', '#rl', '#optimization', '#transfer_learning', '#architecture'], 'emoji': '🧠', 'ru': {'title': 'LoRA в федеративном обучении: разделяй и властвуй', 'desc': 'Исследование применения метода LoRA в федеративном обучении выявило, что матрицы A отвечают за общие знания, а матрицы B - за специфические для клиента. На основе этого был разработан метод FedSA-LoRA, где только матрицы A передаются на сервер для агрегации. Аналогичная закономерность наблюдается и в других вариантах LoRA, что позволило создать FedSA-rsLoRA и FedSA-VeRA. Экспериментальные результаты на задачах обработки естественного языка подтверждают эффективность предложенного подхода.'}, 'en': {'title': 'Enhancing Federated Learning with Low-Rank Adaptation', 'desc': 'This paper explores the use of Low-Rank Adaptation (LoRA) in federated learning by analyzing the asymmetry of the learned matrices A and B. It finds that matrix A captures general knowledge applicable across clients, while matrix B focuses on client-specific information. The authors propose a new method called Federated Share-A Low-Rank Adaptation (FedSA-LoRA), which shares only the A matrices with the server for aggregation, enhancing privacy and efficiency. They also extend this approach to other LoRA variants, establishing a comprehensive framework for integrating LoRA with federated learning, supported by strong experimental results in natural language tasks.'}, 'zh': {'title': '联邦学习中的低秩适应新范式', 'desc': '本文研究了在联邦学习中使用LoRA的方式，分析了学习到的A和B矩阵的不对称性。研究发现，A矩阵负责学习通用知识，而B矩阵则专注于捕捉客户端特定的知识。基于这一发现，提出了联邦共享低秩适应（FedSA-LoRA）方法，该方法使用两个低秩可训练矩阵A和B来建模权重更新，但仅共享A矩阵与服务器进行聚合。通过对其他LoRA变体（如rsLoRA和VeRA）中学习到的A和B矩阵的关系进行深入探讨，建立了将LoRA与联邦学习结合的一般范式，为未来的研究提供了指导。'}}}, {'id': 'https://huggingface.co/papers/2409.20059', 'title': 'Is Preference Alignment Always the Best Option to Enhance LLM-Based Translation? An Empirical Analysis', 'url': 'https://huggingface.co/papers/2409.20059', 'abstract': 'Neural metrics for machine translation (MT) evaluation have become increasingly prominent due to their superior correlation with human judgments compared to traditional lexical metrics. Researchers have therefore utilized neural metrics through quality-informed decoding strategies, achieving better results than likelihood-based methods. With the rise of Large Language Models (LLMs), preference-based alignment techniques have gained attention for their potential to enhance translation quality by optimizing model weights directly on preferences induced by quality estimators. This study focuses on Contrastive Preference Optimization (CPO) and conducts extensive experiments to evaluate the impact of preference-based alignment on translation quality. Our findings indicate that while CPO consistently outperforms Supervised Fine-Tuning (SFT) on high-quality data with regard to the alignment metric, it may lead to instability across downstream evaluation metrics, particularly between neural and lexical ones. Additionally, we demonstrate that relying solely on the base model for generating candidate translations achieves performance comparable to using multiple external systems, while ensuring better consistency across downstream metrics.', 'score': 15, 'issue_id': 1, 'pub_date': '2024-09-30', 'pub_date_card': {'ru': '30 сентября', 'en': 'September 30', 'zh': '9月30日'}, 'hash': '700774cfa4699b68', 'authors': ['Hippolyte Gisserot-Boukhlef', 'Ricardo Rei', 'Emmanuel Malherbe', 'Céline Hudelot', 'Pierre Colombo', 'Nuno M. Guerreiro'], 'affiliations': ['Artefact Research Center', 'CentraleSupélec, Université Paris-Saclay', 'Equall', 'Instituto Superior Técnico & Universidade de Lisboa (Lisbon ELLIS Unit)', 'Instituto de Telecomunicações', 'Unbabel'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.20059.jpg', 'data': {'categories': ['#multilingual', '#training', '#machine_translation', '#optimization', '#alignment', '#architecture'], 'emoji': '🔄', 'ru': {'title': 'Оптимизация предпочтений для улучшения качества машинного перевода', 'desc': 'Это исследование посвящено применению нейронных метрик для оценки качества машинного перевода и использованию предпочтений для оптимизации моделей перевода. Авторы изучают метод Contrastive Preference Optimization (CPO) и сравнивают его с традиционным дообучением на размеченных данных. Результаты показывают, что CPO превосходит обычное дообучение по метрике выравнивания, но может приводить к нестабильности других метрик оценки. Также выявлено, что использование только базовой модели для генерации кандидатов перевода дает результаты, сопоставимые с применением нескольких внешних систем.'}, 'en': {'title': 'Enhancing Translation Quality with Contrastive Preference Optimization', 'desc': 'This paper discusses the use of neural metrics for evaluating machine translation (MT), which are more aligned with human judgment than traditional methods. It highlights the effectiveness of quality-informed decoding strategies that leverage these neural metrics, particularly in the context of Large Language Models (LLMs). The study introduces Contrastive Preference Optimization (CPO) as a technique to improve translation quality by directly optimizing model weights based on preferences from quality estimators. The results show that while CPO outperforms Supervised Fine-Tuning (SFT) on high-quality data, it can cause instability in evaluation metrics, and using the base model for generating translations can yield results similar to those from multiple external systems.'}, 'zh': {'title': '提升机器翻译质量的对比偏好优化', 'desc': '本研究探讨了神经度量在机器翻译评估中的应用，显示其与人类判断的相关性优于传统的词汇度量。研究者们通过质量信息解码策略利用神经度量，取得了比基于似然的方法更好的结果。随着大型语言模型的兴起，基于偏好的对齐技术受到关注，能够通过优化模型权重来提升翻译质量。我们的实验表明，尽管对比偏好优化（CPO）在高质量数据上优于监督微调（SFT），但在下游评估指标上可能导致不稳定性。'}}}, {'id': 'https://huggingface.co/papers/2410.01731', 'title': 'ComfyGen: Prompt-Adaptive Workflows for Text-to-Image Generation', 'url': 'https://huggingface.co/papers/2410.01731', 'abstract': 'The practical use of text-to-image generation has evolved from simple, monolithic models to complex workflows that combine multiple specialized components. While workflow-based approaches can lead to improved image quality, crafting effective workflows requires significant expertise, owing to the large number of available components, their complex inter-dependence, and their dependence on the generation prompt. Here, we introduce the novel task of prompt-adaptive workflow generation, where the goal is to automatically tailor a workflow to each user prompt. We propose two LLM-based approaches to tackle this task: a tuning-based method that learns from user-preference data, and a training-free method that uses the LLM to select existing flows. Both approaches lead to improved image quality when compared to monolithic models or generic, prompt-independent workflows. Our work shows that prompt-dependent flow prediction offers a new pathway to improving text-to-image generation quality, complementing existing research directions in the field.', 'score': 15, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'e1174d1b695405ab', 'pdf_title_img': 'img/title_stub.png', 'data': {'categories': ['#cv', '#training', '#alignment', '#diffusion', '#architecture'], 'emoji': '🎨', 'ru': {'title': 'Адаптивные рабочие процессы: новый путь к улучшению генерации изображений по тексту', 'desc': 'Статья представляет новую задачу генерации рабочих процессов, адаптированных под промпт, для улучшения качества генерации изображений по тексту. Авторы предлагают два подхода на основе больших языковых моделей: метод с дообучением на пользовательских предпочтениях и метод без дополнительного обучения для выбора существующих процессов. Оба подхода показывают улучшение качества изображений по сравнению с монолитными моделями и общими рабочими процессами. Исследование открывает новое направление для повышения качества генерации изображений по тексту.'}, 'en': {'title': 'Tailoring Workflows for Enhanced Image Generation', 'desc': 'This paper discusses advancements in text-to-image generation, moving from simple models to more complex workflows that utilize specialized components. It introduces the task of prompt-adaptive workflow generation, which aims to automatically customize workflows based on user prompts. The authors propose two methods using large language models (LLMs): one that learns from user preferences and another that selects existing workflows without additional training. Both methods enhance image quality compared to traditional models, highlighting the importance of adapting workflows to specific prompts for better results.'}, 'zh': {'title': '基于提示的工作流生成提升图像质量', 'desc': '本文介绍了一种新的任务——基于提示的工作流生成，旨在自动根据用户提示定制工作流。我们提出了两种基于大语言模型（LLM）的方法：一种是基于调优的方法，通过用户偏好数据进行学习；另一种是无训练的方法，利用LLM选择现有的工作流。这两种方法在图像质量上优于单一模型或通用的、与提示无关的工作流。我们的研究表明，基于提示的工作流预测为提高文本到图像生成的质量提供了一条新路径。'}}}, {'id': 'https://huggingface.co/papers/2410.01036', 'title': 'MOSEL: 950,000 Hours of Speech Data for Open-Source Speech Foundation Model Training on EU Languages', 'url': 'https://huggingface.co/papers/2410.01036', 'abstract': 'The rise of foundation models (FMs), coupled with regulatory efforts addressing their risks and impacts, has sparked significant interest in open-source models. However, existing speech FMs (SFMs) fall short of full compliance with the open-source principles, even if claimed otherwise, as no existing SFM has model weights, code, and training data publicly available under open-source terms. In this work, we take the first step toward filling this gap by focusing on the 24 official languages of the European Union (EU). We collect suitable training data by surveying automatic speech recognition datasets and unlabeled speech corpora under open-source compliant licenses, for a total of 950k hours. Additionally, we release automatic transcripts for 441k hours of unlabeled data under the permissive CC-BY license, thereby facilitating the creation of open-source SFMs for the EU languages.', 'score': 14, 'issue_id': 1, 'pub_date': '2024-10-01', 'pub_date_card': {'ru': '1 октября', 'en': 'October 1', 'zh': '10月1日'}, 'hash': '9714f6cb6169fec1', 'authors': ['Marco Gaido', 'Sara Papi', 'Luisa Bentivogli', 'Alessio Brutti', 'Mauro Cettolo', 'Roberto Gretter', 'Marco Matassoni', 'Mohamed Nabih', 'Matteo Negri'], 'affiliations': ['Fondazione Bruno Kessler, Italy'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01036.jpg', 'data': {'categories': ['#audio', '#dataset', '#multilingual', '#data', '#open_source', '#low_resource'], 'emoji': '🗣️', 'ru': {'title': 'Открытые речевые модели для языков ЕС: первый шаг сделан', 'desc': 'Статья посвящена разработке открытых речевых фундаментальных моделей (SFM) для 24 официальных языков Европейского Союза. Авторы собрали 950 тысяч часов речевых данных с открытыми лицензиями для обучения моделей. Они также опубликовали автоматические транскрипции для 441 тысячи часов неразмеченных данных под лицензией CC-BY. Это первый шаг к созданию полностью открытых SFM, так как существующие модели не соответствуют всем принципам открытого исходного кода.'}, 'en': {'title': 'Building Open-Source Speech Models for EU Languages', 'desc': 'This paper addresses the limitations of existing speech foundation models (SFMs) in terms of open-source compliance. It highlights that no current SFM provides model weights, code, and training data that are fully accessible under open-source terms. The authors collect a substantial dataset of 950,000 hours of training data from various automatic speech recognition datasets and unlabeled speech corpora that comply with open-source licenses. They also release automatic transcripts for 441,000 hours of unlabeled data, promoting the development of open-source SFMs for the 24 official languages of the European Union.'}, 'zh': {'title': '推动欧盟语言的开源语音模型发展', 'desc': '本论文关注基础模型（FMs）在开源模型中的应用，特别是语音基础模型（SFMs）。目前，现有的语音基础模型未能完全遵循开源原则，因为没有公开的模型权重、代码和训练数据。我们针对欧盟的24种官方语言，收集了符合开源许可的自动语音识别数据集和未标记语音语料，总计达到950k小时。我们还发布了441k小时未标记数据的自动转录，促进了欧盟语言的开源语音基础模型的创建。'}}}, {'id': 'https://huggingface.co/papers/2410.01769', 'title': 'Quantifying Generalization Complexity for Large Language Models', 'url': 'https://huggingface.co/papers/2410.01769', 'abstract': "While large language models (LLMs) have shown exceptional capabilities in understanding complex queries and performing sophisticated tasks, their generalization abilities are often deeply entangled with memorization, necessitating more precise evaluation. To address this challenge, we introduce Scylla, a dynamic evaluation framework that quantitatively measures the generalization abilities of LLMs. Scylla disentangles generalization from memorization via assessing model performance on both in-distribution (ID) and out-of-distribution (OOD) data through 20 tasks across 5 levels of complexity. Through extensive experiments, we uncover a non-monotonic relationship between task complexity and the performance gap between ID and OOD data, which we term the generalization valley. Specifically, this phenomenon reveals a critical threshold - referred to as critical complexity - where reliance on non-generalizable behavior peaks, indicating the upper bound of LLMs' generalization capabilities. As model size increases, the critical complexity shifts toward higher levels of task complexity, suggesting that larger models can handle more complex reasoning tasks before over-relying on memorization. Leveraging Scylla and the concept of critical complexity, we benchmark 28LLMs including both open-sourced models such as LLaMA and Qwen families, and close-sourced models like Claude and GPT, providing a more robust evaluation and establishing a clearer understanding of LLMs' generalization capabilities.", 'score': 13, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'fc737f630759a34b', 'authors': ['Zhenting Qi', 'Hongyin Luo', 'Xuliang Huang', 'Zhuokai Zhao', 'Yibo Jiang', 'Xiangjun Fan', 'Himabindu Lakkaraju', 'James Glass'], 'affiliations': ['Harvard University', 'Massachusetts Institute of Technology', 'Meta', 'University of Chicago', 'University of Illinois Urbana-Champaign'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01769.jpg', 'data': {'categories': ['#reasoning', '#training', '#interpretability', '#benchmark', '#open_source', '#architecture'], 'emoji': '🧠', 'ru': {'title': 'Scylla: раскрывая границы обобщения языковых моделей', 'desc': "Статья представляет Scylla - динамическую систему оценки, измеряющую способности больших языковых моделей (LLM) к обобщению. Scylla разделяет обобщение и запоминание, оценивая производительность модели на данных распределения обучения и вне его через 20 задач разной сложности. Исследование выявило нелинейную зависимость между сложностью задачи и разрывом в производительности, названную 'долиной обобщения'. Обнаружено, что с увеличением размера модели критическая сложность смещается в сторону более сложных задач рассуждения."}, 'en': {'title': 'Scylla: Unraveling Generalization in Large Language Models', 'desc': "This paper presents Scylla, a new evaluation framework designed to measure the generalization abilities of large language models (LLMs) while separating it from memorization. Scylla evaluates model performance on both in-distribution (ID) and out-of-distribution (OOD) data across various tasks of increasing complexity. The study identifies a phenomenon called the 'generalization valley,' which highlights a critical complexity threshold where models tend to rely more on memorization rather than generalization. Additionally, it shows that as LLMs grow in size, they can tackle more complex tasks before this reliance on memorization becomes problematic."}, 'zh': {'title': '揭示大型语言模型的泛化能力', 'desc': '本文介绍了一种名为Scylla的动态评估框架，用于定量测量大型语言模型（LLMs）的泛化能力。Scylla通过评估模型在分布内（ID）和分布外（OOD）数据上的表现，来区分泛化与记忆化。研究发现任务复杂性与ID和OOD数据之间的性能差距呈现非单调关系，称为泛化谷。随着模型规模的增加，关键复杂性向更高的任务复杂性移动，表明更大的模型在过度依赖记忆化之前能够处理更复杂的推理任务。'}}}, {'id': 'https://huggingface.co/papers/2410.01691', 'title': 'FactAlign: Long-form Factuality Alignment of Large Language Models', 'url': 'https://huggingface.co/papers/2410.01691', 'abstract': "Large language models have demonstrated significant potential as the next-generation information access engines. However, their reliability is hindered by issues of hallucination and generating non-factual content. This is particularly problematic in long-form responses, where assessing and ensuring factual accuracy is complex. In this paper, we address this gap by proposing FactAlign, a novel alignment framework designed to enhance the factuality of LLMs' long-form responses while maintaining their helpfulness. We introduce fKTO, a fine-grained, sentence-level alignment algorithm that extends the Kahneman-Tversky Optimization (KTO) alignment method. Leveraging recent advances in automatic factuality evaluation, FactAlign utilizes fine-grained factuality assessments to guide the alignment process. Our experiments on open-domain prompts and information-seeking questions demonstrate that FactAlign significantly improves the factual accuracy of LLM responses while also improving their helpfulness. Further analyses identify that FactAlign is capable of training LLMs to provide more information without losing factual precision, thus improving the factual F1 score. Our source code, datasets, and trained models are publicly available at https://github.com/MiuLab/FactAlign", 'score': 8, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '3b5151bf24c4fc15', 'authors': ['Chao-Wei Huang', 'Yun-Nung Chen'], 'affiliations': ['National Taiwan University, Taipei, Taiwan'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01691.jpg', 'data': {'categories': ['#dataset', '#hallucinations', '#long_context', '#training', '#alignment', '#open_source', '#architecture'], 'emoji': '🔍', 'ru': {'title': 'FactAlign: повышение фактической точности языковых моделей без потери полезности', 'desc': 'Статья представляет FactAlign - новую систему для улучшения фактической точности длинных ответов языковых моделей. Авторы предлагают алгоритм fKTO, который оптимизирует ответы на уровне предложений, используя автоматическую оценку фактичности. Эксперименты показывают, что FactAlign значительно повышает фактическую точность ответов, сохраняя их полезность. Система также обучает модели предоставлять больше информации без потери точности.'}, 'en': {'title': 'Enhancing Factual Accuracy in Language Models with FactAlign', 'desc': 'This paper introduces FactAlign, a new framework aimed at improving the factual accuracy of long-form responses generated by large language models (LLMs). The authors highlight the problem of hallucination, where LLMs produce incorrect or misleading information, especially in extended outputs. FactAlign employs a fine-grained alignment algorithm called fKTO, which enhances the alignment process by using detailed factuality evaluations. Experimental results show that FactAlign not only boosts the factual accuracy of LLM responses but also maintains their overall helpfulness, leading to better performance in information retrieval tasks.'}, 'zh': {'title': '提升大型语言模型的事实准确性与有用性', 'desc': '本论文提出了一种新的对齐框架FactAlign，旨在提高大型语言模型（LLMs）在长文本响应中的事实准确性。我们引入了一种细粒度的句子级对齐算法fKTO，扩展了Kahneman-Tversky优化方法。FactAlign利用自动事实评估的最新进展，通过细粒度的事实评估来指导对齐过程。实验结果表明，FactAlign显著提高了LLM响应的事实准确性和有用性，同时保持了信息的丰富性。'}}}, {'id': 'https://huggingface.co/papers/2410.02197', 'title': 'General Preference Modeling with Preference Representations for Aligning Language Models', 'url': 'https://huggingface.co/papers/2410.02197', 'abstract': 'Modeling human preferences is crucial for aligning foundation models with human values. Traditional reward modeling methods, such as the Bradley-Terry (BT) reward model, fall short in expressiveness, particularly in addressing intransitive preferences. Although supervised pair preference models (PairPM) can express general preferences, their implementation is highly ad-hoc and cannot guarantee a consistent preference probability of compared pairs. Additionally, they impose high computational costs due to their quadratic query complexity when comparing multiple responses. In this paper, we introduce preference representation learning, an approach that embeds responses into a latent space to capture intricate preference structures efficiently, achieving linear query complexity. Additionally, we propose preference score-based General Preference Optimization (GPO), which generalizes reward-based reinforcement learning from human feedback. Experimental results show that our General Preference representation model (GPM) outperforms the BT reward model on the RewardBench benchmark with a margin of up to 5.6% and effectively models cyclic preferences where any BT reward model behaves like a random guess. Furthermore, evaluations on downstream tasks such as AlpacaEval2.0 and MT-Bench, following the language model post-training with GPO and our general preference model, reveal substantial performance improvements with margins up to 9.3%. These findings indicate that our method may enhance the alignment of foundation models with nuanced human values. The code is available at https://github.com/general-preference/general-preference-model.', 'score': 7, 'issue_id': 1, 'pub_date': '2024-10-03', 'pub_date_card': {'ru': '3 октября', 'en': 'October 3', 'zh': '10月3日'}, 'hash': 'eb2fb462dcfba826', 'authors': ['Yifan Zhang', 'Ge Zhang', 'Yue Wu', 'Kangping Xu', 'Quanquan Gu'], 'affiliations': [], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.02197.jpg', 'data': {'categories': ['#training', '#optimization', '#alignment', '#benchmark', '#open_source', '#rlhf', '#architecture'], 'emoji': '🧠', 'ru': {'title': 'Новый подход к моделированию человеческих предпочтений для alignment ИИ', 'desc': 'Статья представляет новый подход к моделированию предпочтений человека для улучшения alignment крупных языковых моделей. Авторы вводят понятие preference representation learning, которое позволяет эффективно отображать ответы в латентное пространство для захвата сложных структур предпочтений. Также предлагается метод General Preference Optimization (GPO), обобщающий обучение с подкреплением на основе обратной связи от человека. Эксперименты показывают, что предложенная модель General Preference Model (GPM) превосходит традиционные подходы на нескольких бенчмарках, демонстрируя потенциал для улучшения соответствия языковых моделей человеческим ценностям.'}, 'en': {'title': 'Enhancing Model Alignment with Human Preferences through Efficient Learning', 'desc': 'This paper addresses the challenge of aligning foundation models with human preferences by introducing preference representation learning. Unlike traditional reward modeling methods, which struggle with intransitive preferences and high computational costs, this new approach efficiently captures complex preference structures in a latent space. The authors also present General Preference Optimization (GPO), which extends reinforcement learning from human feedback to improve preference modeling. Experimental results demonstrate that their General Preference representation model (GPM) significantly outperforms existing methods, particularly in handling cyclic preferences and enhancing model performance on various tasks.'}, 'zh': {'title': '提升模型与人类价值观的对齐能力', 'desc': '本文提出了一种新的偏好表示学习方法，旨在更好地捕捉人类的复杂偏好结构。与传统的奖励建模方法相比，该方法通过将响应嵌入到潜在空间中，显著提高了表达能力，并降低了计算复杂度。我们还提出了一种基于偏好分数的通用偏好优化方法，能够有效地从人类反馈中进行强化学习。实验结果表明，我们的通用偏好模型在多个基准测试中表现优于传统模型，能够更好地对齐基础模型与人类价值观。'}}}, {'id': 'https://huggingface.co/papers/2410.01171', 'title': 'BordIRlines: A Dataset for Evaluating Cross-lingual Retrieval-Augmented Generation', 'url': 'https://huggingface.co/papers/2410.01171', 'abstract': "Large language models excel at creative generation but continue to struggle with the issues of hallucination and bias. While retrieval-augmented generation (RAG) provides a framework for grounding LLMs' responses in accurate and up-to-date information, it still raises the question of bias: which sources should be selected for inclusion in the context? And how should their importance be weighted? In this paper, we study the challenge of cross-lingual RAG and present a dataset to investigate the robustness of existing systems at answering queries about geopolitical disputes, which exist at the intersection of linguistic, cultural, and political boundaries. Our dataset is sourced from Wikipedia pages containing information relevant to the given queries and we investigate the impact of including additional context, as well as the composition of this context in terms of language and source, on an LLM's response. Our results show that existing RAG systems continue to be challenged by cross-lingual use cases and suffer from a lack of consistency when they are provided with competing information in multiple languages. We present case studies to illustrate these issues and outline steps for future research to address these challenges. We make our dataset and code publicly available at https://github.com/manestay/bordIRlines.", 'score': 5, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '6a353ff492b330af', 'authors': ['Bryan Li', 'Samar Haider', 'Fiona Luo', 'Adwait Agashe', 'Chris Callison-Burch'], 'affiliations': ['University of Pennsylvania, Philadelphia, PA, USA'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01171.jpg', 'data': {'categories': ['#dataset', '#hallucinations', '#multilingual', '#rag', '#ethics', '#transfer_learning', '#open_source'], 'emoji': '🌍', 'ru': {'title': 'Преодоление языковых барьеров в retrieval-augmented generation', 'desc': 'Статья исследует проблему кросс-языкового retrieval-augmented generation (RAG) в контексте геополитических споров. Авторы представляют набор данных для изучения устойчивости существующих систем при ответе на запросы, находящиеся на пересечении языковых, культурных и политических границ. Исследование показывает, что современные RAG-системы по-прежнему сталкиваются с трудностями в кросс-языковых сценариях и страдают от отсутствия согласованности при предоставлении противоречивой информации на разных языках. Авторы предлагают направления для будущих исследований по решению этих проблем.'}, 'en': {'title': 'Enhancing LLMs: Tackling Bias and Hallucination in Cross-Lingual Contexts', 'desc': 'This paper addresses the limitations of large language models (LLMs) in generating accurate responses, particularly focusing on the issues of hallucination and bias. It explores retrieval-augmented generation (RAG) as a method to enhance LLMs by grounding their outputs in reliable information, while also questioning how to select and weigh sources effectively. The authors introduce a new dataset aimed at evaluating RAG systems in the context of geopolitical disputes, highlighting the challenges posed by cross-lingual queries. Their findings reveal that current RAG systems struggle with consistency and accuracy when faced with conflicting information across different languages, suggesting a need for further research in this area.'}, 'zh': {'title': '跨语言检索增强生成的挑战与机遇', 'desc': '这篇论文探讨了大型语言模型在生成创意内容时面临的幻觉和偏见问题。尽管检索增强生成（RAG）为大型语言模型的响应提供了准确和最新信息的框架，但在选择信息来源时仍然存在偏见问题。我们研究了跨语言RAG的挑战，并提出了一个数据集，以调查现有系统在回答地缘政治争端查询时的鲁棒性。研究结果表明，现有的RAG系统在处理多语言竞争信息时仍然面临挑战，缺乏一致性。'}}}, {'id': 'https://huggingface.co/papers/2409.18111', 'title': 'E.T. Bench: Towards Open-Ended Event-Level Video-Language Understanding', 'url': 'https://huggingface.co/papers/2409.18111', 'abstract': 'Recent advances in Video Large Language Models (Video-LLMs) have demonstrated their great potential in general-purpose video understanding. To verify the significance of these models, a number of benchmarks have been proposed to diagnose their capabilities in different scenarios. However, existing benchmarks merely evaluate models through video-level question-answering, lacking fine-grained event-level assessment and task diversity. To fill this gap, we introduce E.T. Bench (Event-Level & Time-Sensitive Video Understanding Benchmark), a large-scale and high-quality benchmark for open-ended event-level video understanding. Categorized within a 3-level task taxonomy, E.T. Bench encompasses 7.3K samples under 12 tasks with 7K videos (251.4h total length) under 8 domains, providing comprehensive evaluations. We extensively evaluated 8 Image-LLMs and 12 Video-LLMs on our benchmark, and the results reveal that state-of-the-art models for coarse-level (video-level) understanding struggle to solve our fine-grained tasks, e.g., grounding event-of-interests within videos, largely due to the short video context length, improper time representations, and lack of multi-event training data. Focusing on these issues, we further propose a strong baseline model, E.T. Chat, together with an instruction-tuning dataset E.T. Instruct 164K tailored for fine-grained event-level understanding. Our simple but effective solution demonstrates superior performance in multiple scenarios.', 'score': 5, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': '5135f0df381cd4c0', 'authors': ['Ye Liu', 'Zongyang Ma', 'Zhongang Qi', 'Yang Wu', 'Ying Shan', 'Chang Wen Chen'], 'affiliations': ['ARC Lab, Tencent PCG', 'Institute of Automation, Chinese Academy of Sciences', 'Tencent AI Lab', 'The Hong Kong Polytechnic University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.18111.jpg', 'data': {'categories': ['#video', '#long_context', '#training', '#interpretability', '#benchmark', '#games', '#synthetic'], 'emoji': '🎥', 'ru': {'title': 'E.T. Bench: новый стандарт для оценки понимания видео на уровне событий', 'desc': 'Статья представляет новый бенчмарк E.T. Bench для оценки Video-LLM на уровне событий и с учетом времени. Бенчмарк включает 7.3 тысячи образцов по 12 задачам с использованием 7 тысяч видео из 8 доменов. Авторы провели оценку 8 Image-LLM и 12 Video-LLM моделей, выявив их ограничения в решении задач на уровне событий. Для улучшения результатов предложена базовая модель E.T. Chat и набор данных E.T. Instruct 164K для тонкой настройки.'}, 'en': {'title': 'E.T. Bench: Elevating Video Understanding to Event-Level Precision', 'desc': 'This paper discusses the development of E.T. Bench, a new benchmark designed for evaluating Video Large Language Models (Video-LLMs) in understanding events within videos. Unlike previous benchmarks that only assess video-level question-answering, E.T. Bench focuses on fine-grained event-level tasks across various scenarios. The benchmark includes a diverse set of tasks and a large dataset, allowing for comprehensive evaluation of model capabilities. Additionally, the authors introduce E.T. Chat, a baseline model that improves performance on these fine-grained tasks by addressing issues like short video context and inadequate training data.'}, 'zh': {'title': '提升视频理解的细粒度评估', 'desc': '最近，视频大型语言模型（Video-LLMs）在视频理解方面展现了巨大的潜力。为了验证这些模型的能力，研究者们提出了多个基准测试，但现有的基准仅通过视频级问答进行评估，缺乏对事件级的细致评估和任务多样性。为了解决这个问题，我们引入了E.T. Bench，这是一个大规模、高质量的开放式事件级视频理解基准，涵盖了12个任务和7.3K样本。我们还提出了E.T. Chat模型和针对细粒度事件理解的指令调优数据集E.T. Instruct 164K，展示了在多个场景中的优越性能。'}}}, {'id': 'https://huggingface.co/papers/2410.01804', 'title': 'EVER: Exact Volumetric Ellipsoid Rendering for Real-time View Synthesis', 'url': 'https://huggingface.co/papers/2410.01804', 'abstract': 'We present Exact Volumetric Ellipsoid Rendering (EVER), a method for real-time differentiable emission-only volume rendering. Unlike recent rasterization based approach by 3D Gaussian Splatting (3DGS), our primitive based representation allows for exact volume rendering, rather than alpha compositing 3D Gaussian billboards. As such, unlike 3DGS our formulation does not suffer from popping artifacts and view dependent density, but still achieves frame rates of sim!30 FPS at 720p on an NVIDIA RTX4090. Since our approach is built upon ray tracing it enables effects such as defocus blur and camera distortion (e.g. such as from fisheye cameras), which are difficult to achieve by rasterization. We show that our method is more accurate with fewer blending issues than 3DGS and follow-up work on view-consistent rendering, especially on the challenging large-scale scenes from the Zip-NeRF dataset where it achieves sharpest results among real-time techniques.', 'score': 5, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '268be8bd1eb6316f', 'authors': ['Alexander Mai', 'Peter Hedman', 'George Kopanas', 'Dor Verbin', 'David Futschik', 'Qiangeng Xu', 'Falko Kuester', 'Jonathan T. Barron', 'Yinda Zhang'], 'affiliations': ['Google', 'University of California, San Diego'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01804.jpg', 'data': {'categories': ['#dataset', '#cv', '#3d'], 'emoji': '🔮', 'ru': {'title': 'EVER: Точный объемный рендеринг в реальном времени', 'desc': 'EVER (Exact Volumetric Ellipsoid Rendering) - это новый метод дифференцируемого рендеринга объемов в реальном времени. В отличие от 3D Gaussian Splatting, EVER обеспечивает точный рендеринг объемов без артефактов и зависимости плотности от ракурса. Метод достигает скорости около 30 кадров в секунду при разрешении 720p на NVIDIA RTX4090. EVER позволяет применять эффекты размытия и искажения камеры, демонстрируя более точные результаты на сложных сценах из датасета Zip-NeRF.'}, 'en': {'title': 'Achieving Real-Time Precision in Volume Rendering with EVER', 'desc': 'The paper introduces Exact Volumetric Ellipsoid Rendering (EVER), a novel technique for real-time volume rendering that focuses on emission-only scenarios. Unlike the 3D Gaussian Splatting (3DGS) method, which uses alpha compositing, EVER employs a primitive-based representation that ensures precise volume rendering without artifacts. This method achieves high frame rates of around 30 FPS at 720p resolution on advanced hardware, while also supporting complex effects like defocus blur and camera distortion. The results demonstrate that EVER outperforms 3DGS in accuracy and blending quality, particularly in large-scale scenes from the Zip-NeRF dataset.'}, 'zh': {'title': '实时精确体积渲染的新方法', 'desc': '我们提出了一种名为精确体积椭球渲染（EVER）的方法，用于实时可微分的仅发射体积渲染。与基于光栅化的3D高斯点云方法（3DGS）不同，我们的原始表示允许进行精确的体积渲染，而不是对3D高斯广告牌进行透明合成。因此，我们的方法没有出现3DGS中的弹跳伪影和视角依赖密度问题，同时在NVIDIA RTX4090上仍能以720p的分辨率达到每秒30帧的帧率。由于我们的方法基于光线追踪，它能够实现模糊和相机畸变等效果，这些效果在光栅化中难以实现。'}}}, {'id': 'https://huggingface.co/papers/2410.00296', 'title': 'VLMGuard: Defending VLMs against Malicious Prompts via Unlabeled Data', 'url': 'https://huggingface.co/papers/2410.00296', 'abstract': 'Vision-language models (VLMs) are essential for contextual understanding of both visual and textual information. However, their vulnerability to adversarially manipulated inputs presents significant risks, leading to compromised outputs and raising concerns about the reliability in VLM-integrated applications. Detecting these malicious prompts is thus crucial for maintaining trust in VLM generations. A major challenge in developing a safeguarding prompt classifier is the lack of a large amount of labeled benign and malicious data. To address the issue, we introduce VLMGuard, a novel learning framework that leverages the unlabeled user prompts in the wild for malicious prompt detection. These unlabeled prompts, which naturally arise when VLMs are deployed in the open world, consist of both benign and malicious information. To harness the unlabeled data, we present an automated maliciousness estimation score for distinguishing between benign and malicious samples within this unlabeled mixture, thereby enabling the training of a binary prompt classifier on top. Notably, our framework does not require extra human annotations, offering strong flexibility and practicality for real-world applications. Extensive experiment shows VLMGuard achieves superior detection results, significantly outperforming state-of-the-art methods. Disclaimer: This paper may contain offensive examples; reader discretion is advised.', 'score': 4, 'issue_id': 1, 'pub_date': '2024-10-01', 'pub_date_card': {'ru': '1 октября', 'en': 'October 1', 'zh': '10月1日'}, 'hash': 'eb03a7f1b6890abe', 'authors': ['Xuefeng Du', 'Reshmi Ghosh', 'Robert Sim', 'Ahmed Salem', 'Vitor Carvalho', 'Emily Lawton', 'Yixuan Li', 'Jack W. Stokes'], 'affiliations': ['Microsoft Corp.', 'University of Wisconsin - Madison'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.00296.jpg', 'data': {'categories': ['#cv', '#security', '#training', '#data', '#synthetic', '#multimodal'], 'emoji': '🛡️', 'ru': {'title': 'VLMGuard: защита визуально-языковых моделей от вредоносных промптов', 'desc': 'В статье представлен VLMGuard - новый подход к обнаружению вредоносных промптов для визуально-языковых моделей (VLM). Авторы предлагают использовать неразмеченные пользовательские запросы для обучения классификатора, способного отличать безопасные промпты от вредоносных. Ключевая особенность метода - автоматическая оценка вредоносности промптов без необходимости ручной разметки данных. Эксперименты показывают, что VLMGuard значительно превосходит существующие методы обнаружения вредоносных промптов для VLM.'}, 'en': {'title': 'VLMGuard: Safeguarding Vision-Language Models from Malicious Prompts', 'desc': 'This paper introduces VLMGuard, a framework designed to detect malicious prompts in vision-language models (VLMs) without needing extensive labeled data. It utilizes unlabeled user prompts, which are common in real-world applications, to differentiate between benign and malicious inputs. By implementing an automated maliciousness estimation score, VLMGuard can effectively train a binary classifier to enhance the reliability of VLM outputs. The results demonstrate that VLMGuard significantly outperforms existing methods in detecting adversarial prompts, ensuring safer use of VLMs in various applications.'}, 'zh': {'title': 'VLMGuard：提升视觉语言模型的安全性', 'desc': '视觉语言模型（VLMs）在理解视觉和文本信息的上下文中至关重要。然而，它们对恶意输入的脆弱性带来了重大风险，影响了输出的可靠性。为了解决这一问题，我们提出了VLMGuard，一个新颖的学习框架，利用未标记的用户提示来检测恶意提示。我们的框架通过自动化的恶意性估计评分，能够在未标记的数据中区分良性和恶意样本，从而训练出一个二元提示分类器。'}}}, {'id': 'https://huggingface.co/papers/2410.00316', 'title': 'EmoKnob: Enhance Voice Cloning with Fine-Grained Emotion Control', 'url': 'https://huggingface.co/papers/2410.00316', 'abstract': 'While recent advances in Text-to-Speech (TTS) technology produce natural and expressive speech, they lack the option for users to select emotion and control intensity. We propose EmoKnob, a framework that allows fine-grained emotion control in speech synthesis with few-shot demonstrative samples of arbitrary emotion. Our framework leverages the expressive speaker representation space made possible by recent advances in foundation voice cloning models. Based on the few-shot capability of our emotion control framework, we propose two methods to apply emotion control on emotions described by open-ended text, enabling an intuitive interface for controlling a diverse array of nuanced emotions. To facilitate a more systematic emotional speech synthesis field, we introduce a set of evaluation metrics designed to rigorously assess the faithfulness and recognizability of emotion control frameworks. Through objective and subjective evaluations, we show that our emotion control framework effectively embeds emotions into speech and surpasses emotion expressiveness of commercial TTS services.', 'score': 4, 'issue_id': 1, 'pub_date': '2024-10-01', 'pub_date_card': {'ru': '1 октября', 'en': 'October 1', 'zh': '10月1日'}, 'hash': '3b1f67773dd59956', 'authors': ['Haozhe Chen', 'Run Chen', 'Julia Hirschberg'], 'affiliations': ['Columbia University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.00316.jpg', 'data': {'categories': ['#audio', '#dataset', '#benchmark', '#open_source', '#synthetic'], 'emoji': '🗣️', 'ru': {'title': 'Тонкая настройка эмоций в синтезированной речи', 'desc': 'Исследователи представили EmoKnob - фреймворк для точного контроля эмоций в синтезе речи с использованием малого числа демонстрационных образцов. Система использует пространство представлений выразительных голосов, созданное современными моделями клонирования голоса. Предложены два метода для применения эмоционального контроля на основе текстовых описаний эмоций. Авторы также ввели набор метрик для оценки точности и узнаваемости систем эмоционального контроля в синтезе речи.'}, 'en': {'title': 'EmoKnob: Fine-Grained Emotion Control in Speech Synthesis', 'desc': 'This paper introduces EmoKnob, a novel framework for enhancing Text-to-Speech (TTS) systems by allowing users to control emotions and their intensity in synthesized speech. It utilizes few-shot learning techniques to enable emotion control based on limited examples, leveraging advanced voice cloning models for expressive speaker representation. The framework includes methods for interpreting open-ended text descriptions of emotions, providing a user-friendly interface for nuanced emotional expression. Additionally, the authors propose new evaluation metrics to assess the effectiveness of emotion control in TTS, demonstrating that EmoKnob outperforms existing commercial TTS services in emotional expressiveness.'}, 'zh': {'title': '情感控制，语音合成的新突破', 'desc': '本文提出了一种名为EmoKnob的框架，旨在改善文本到语音（TTS）技术中的情感控制。该框架允许用户通过少量示例样本来精细调节合成语音的情感和强度。我们利用了基础语音克隆模型的进展，构建了一个富有表现力的说话者表示空间。通过引入一套评估指标，我们系统地评估了情感控制的有效性，结果表明该框架在情感表达上超越了现有的商业TTS服务。'}}}, {'id': 'https://huggingface.co/papers/2410.01723', 'title': 'HarmoniCa: Harmonizing Training and Inference for Better Feature Cache in Diffusion Transformer Acceleration', 'url': 'https://huggingface.co/papers/2410.01723', 'abstract': 'Diffusion Transformers (DiTs) have gained prominence for outstanding scalability and extraordinary performance in generative tasks. However, their considerable inference costs impede practical deployment. The feature cache mechanism, which involves storing and retrieving redundant computations across timesteps, holds promise for reducing per-step inference time in diffusion models. Most existing caching methods for DiT are manually designed. Although the learning-based approach attempts to optimize strategies adaptively, it suffers from discrepancies between training and inference, which hampers both the performance and acceleration ratio. Upon detailed analysis, we pinpoint that these discrepancies primarily stem from two aspects: (1) Prior Timestep Disregard, where training ignores the effect of cache usage at earlier timesteps, and (2) Objective Mismatch, where the training target (align predicted noise in each timestep) deviates from the goal of inference (generate the high-quality image). To alleviate these discrepancies, we propose HarmoniCa, a novel method that Harmonizes training and inference with a novel learning-based Caching framework built upon Step-Wise Denoising Training (SDT) and Image Error Proxy-Guided Objective (IEPO). Compared to the traditional training paradigm, the newly proposed SDT maintains the continuity of the denoising process, enabling the model to leverage information from prior timesteps during training, similar to the way it operates during inference. Furthermore, we design IEPO, which integrates an efficient proxy mechanism to approximate the final image error caused by reusing the cached feature. Therefore, IEPO helps balance final image quality and cache utilization, resolving the issue of training that only considers the impact of cache usage on the predicted output at each timestep.', 'score': 4, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '1347f88bc3c0da94', 'authors': ['Yushi Huang', 'Zining Wang', 'Ruihao Gong', 'Jing Liu', 'Xinjie Zhang', 'Jun Zhang'], 'affiliations': ['Beihang University', 'HKUST', 'Monash University', 'SenseTime Research'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01723.jpg', 'data': {'categories': ['#training', '#inference', '#optimization', '#diffusion', '#architecture'], 'emoji': '🔄', 'ru': {'title': 'Гармонизация обучения и вывода для эффективного кэширования в диффузионных моделях', 'desc': 'Статья представляет новый метод HarmoniCa для оптимизации кэширования в диффузионных трансформерах. Авторы выявили несоответствия между обучением и выводом в существующих подходах к кэшированию. Предложенный метод включает пошаговое обучение шумоподавлению (SDT) и целевую функцию, основанную на прокси-оценке ошибки изображения (IEPO). HarmoniCa позволяет эффективно балансировать качество итогового изображения и использование кэша, улучшая производительность и ускорение диффузионных моделей.'}, 'en': {'title': 'HarmoniCa: Bridging Training and Inference for Efficient Diffusion Transformers', 'desc': 'This paper introduces HarmoniCa, a new method designed to improve the efficiency of Diffusion Transformers (DiTs) in generative tasks by addressing the discrepancies between training and inference. The authors identify two main issues: the neglect of prior timestep effects during training and the mismatch between training objectives and inference goals. HarmoniCa employs a Step-Wise Denoising Training (SDT) approach to ensure that the model learns to utilize cached features effectively, mirroring its inference behavior. Additionally, the Image Error Proxy-Guided Objective (IEPO) is introduced to optimize the balance between image quality and cache usage, enhancing the overall performance of DiTs during deployment.'}, 'zh': {'title': 'HarmoniCa：提升扩散模型推理效率的创新方法', 'desc': '扩散变换器（DiTs）在生成任务中表现出色，但其推理成本较高，限制了实际应用。我们提出了一种新的方法HarmoniCa，通过一种基于学习的缓存框架来协调训练和推理过程，解决了训练和推理之间的差异。该方法采用逐步去噪训练（SDT）和图像误差代理引导目标（IEPO），使模型在训练时能够利用之前时间步的信息。通过这种方式，HarmoniCa提高了推理效率，同时保持了生成图像的高质量。'}}}, {'id': 'https://huggingface.co/papers/2409.20325', 'title': 'Old Optimizer, New Norm: An Anthology', 'url': 'https://huggingface.co/papers/2409.20325', 'abstract': 'Deep learning optimizers are often motivated through a mix of convex and approximate second-order theory. We select three such methods -- Adam, Shampoo and Prodigy -- and argue that each method can instead be understood as a squarely first-order method without convexity assumptions. In fact, after switching off exponential moving averages, each method is equivalent to steepest descent under a particular norm. By generalizing this observation, we chart a new design space for training algorithms. Different operator norms should be assigned to different tensors based on the role that the tensor plays within the network. For example, while linear and embedding layers may have the same weight space of R^{mtimes n}, these layers play different roles and should be assigned different norms. We hope that this idea of carefully metrizing the neural architecture might lead to more stable, scalable and indeed faster training.', 'score': 3, 'issue_id': 1, 'pub_date': '2024-09-30', 'pub_date_card': {'ru': '30 сентября', 'en': 'September 30', 'zh': '9月30日'}, 'hash': 'cf41fa7b2190f430', 'authors': ['Jeremy Bernstein', 'Laker Newhouse'], 'affiliations': ['MIT CSAIL, United States'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.20325.jpg', 'data': {'categories': ['#math', '#training', '#optimization', '#architecture'], 'emoji': '🧭', 'ru': {'title': 'Новый взгляд на оптимизацию нейросетей: метрика важнее, чем мы думали', 'desc': 'Статья рассматривает популярные оптимизаторы глубокого обучения, такие как Adam, Shampoo и Prodigy, с новой точки зрения. Авторы утверждают, что эти методы можно интерпретировать как методы первого порядка без предположений о выпуклости. Они показывают, что каждый метод эквивалентен градиентному спуску с определенной нормой. На основе этого наблюдения предлагается новый подход к разработке алгоритмов обучения, где различным тензорам присваиваются разные операторные нормы в зависимости от их роли в нейронной сети.'}, 'en': {'title': 'Reimagining Optimizers: Tailoring Norms for Neural Network Efficiency', 'desc': 'This paper explores deep learning optimizers, specifically Adam, Shampoo, and Prodigy, and reinterprets them as first-order methods without relying on convexity. The authors demonstrate that by disabling exponential moving averages, these optimizers can be viewed as steepest descent methods under specific norms. They propose a novel approach to designing training algorithms by assigning different operator norms to tensors based on their roles in the neural network. This careful metrization of the architecture aims to enhance the stability, scalability, and speed of the training process.'}, 'zh': {'title': '优化器的新视角：根据角色分配范数', 'desc': '深度学习优化器通常基于凸性和近似二阶理论进行设计。本文选择了三种优化方法——Adam、Shampoo和Prodigy，并提出它们可以被理解为不依赖于凸性假设的一阶方法。通过关闭指数移动平均，这些方法实际上等同于在特定范数下的最陡下降法。我们建议根据张量在网络中的角色，为不同的张量分配不同的算子范数，以期实现更稳定、更可扩展和更快速的训练。'}}}, {'id': 'https://huggingface.co/papers/2410.01440', 'title': 'Closed-loop Long-horizon Robotic Planning via Equilibrium Sequence Modeling', 'url': 'https://huggingface.co/papers/2410.01440', 'abstract': 'In the endeavor to make autonomous robots take actions, task planning is a major challenge that requires translating high-level task descriptions into long-horizon action sequences. Despite recent advances in language model agents, they remain prone to planning errors and limited in their ability to plan ahead. To address these limitations in robotic planning, we advocate a self-refining scheme that iteratively refines a draft plan until an equilibrium is reached. Remarkably, this process can be optimized end-to-end from an analytical perspective without the need to curate additional verifiers or reward models, allowing us to train self-refining planners in a simple supervised learning fashion. Meanwhile, a nested equilibrium sequence modeling procedure is devised for efficient closed-loop planning that incorporates useful feedback from the environment (or an internal world model). Our method is evaluated on the VirtualHome-Env benchmark, showing advanced performance with better scaling for inference computation. Code is available at https://github.com/Singularity0104/equilibrium-planner.', 'score': 3, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': '903dc847118efa24', 'authors': ['Jinghan Li', 'Zhicheng Sun', 'Fei Li', 'Cao Sheng', 'Jiazhong Yu', 'Yadong Mu'], 'affiliations': ['China Tower', 'Peking University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01440.jpg', 'data': {'categories': ['#reasoning', '#inference', '#rl', '#optimization', '#benchmark', '#open_source', '#robotics'], 'emoji': '🤖', 'ru': {'title': 'Самоулучшающееся планирование для автономных роботов', 'desc': 'В этой статье представлен новый подход к планированию действий для автономных роботов с использованием самоулучшающейся схемы. Метод итеративно уточняет черновой план до достижения равновесия, что позволяет обучать планировщики путем простого контролируемого обучения. Предложена процедура моделирования вложенных равновесных последовательностей для эффективного планирования с обратной связью. Метод показал улучшенную производительность на бенчмарке VirtualHome-Env с лучшим масштабированием вычислений при выводе.'}, 'en': {'title': 'Self-Refining Planning for Autonomous Robots', 'desc': 'This paper addresses the challenge of task planning in autonomous robots, which involves converting high-level tasks into detailed action sequences. The authors propose a self-refining planning approach that iteratively improves an initial draft plan until it stabilizes at an optimal solution. This method can be trained using supervised learning without needing extra verification systems or reward models, making it simpler to implement. Additionally, they introduce a nested equilibrium sequence modeling technique that enhances planning efficiency by utilizing feedback from the environment or an internal model.'}, 'zh': {'title': '自我精炼：提升机器人任务规划的智能', 'desc': '本文探讨了自主机器人任务规划的挑战，特别是将高层任务描述转化为长时间的行动序列。尽管语言模型代理有了进展，但它们在规划时仍容易出错，且前瞻性有限。为了解决这些问题，本文提出了一种自我精炼的方案，通过迭代优化草拟计划，直到达到平衡状态。该方法可以从分析的角度进行端到端优化，无需额外的验证器或奖励模型，且在VirtualHome-Env基准测试中表现出色。'}}}, {'id': 'https://huggingface.co/papers/2410.01481', 'title': 'SonicSim: A customizable simulation platform for speech processing in moving sound source scenarios', 'url': 'https://huggingface.co/papers/2410.01481', 'abstract': 'The systematic evaluation of speech separation and enhancement models under moving sound source conditions typically requires extensive data comprising diverse scenarios. However, real-world datasets often contain insufficient data to meet the training and evaluation requirements of models. Although synthetic datasets offer a larger volume of data, their acoustic simulations lack realism. Consequently, neither real-world nor synthetic datasets effectively fulfill practical needs. To address these issues, we introduce SonicSim, a synthetic toolkit de-designed to generate highly customizable data for moving sound sources. SonicSim is developed based on the embodied AI simulation platform, Habitat-sim, supporting multi-level adjustments, including scene-level, microphone-level, and source-level, thereby generating more diverse synthetic data. Leveraging SonicSim, we constructed a moving sound source benchmark dataset, SonicSet, using the Librispeech, the Freesound Dataset 50k (FSD50K) and Free Music Archive (FMA), and 90 scenes from the Matterport3D to evaluate speech separation and enhancement models. Additionally, to validate the differences between synthetic data and real-world data, we randomly selected 5 hours of raw data without reverberation from the SonicSet validation set to record a real-world speech separation dataset, which was then compared with the corresponding synthetic datasets. Similarly, we utilized the real-world speech enhancement dataset RealMAN to validate the acoustic gap between other synthetic datasets and the SonicSet dataset for speech enhancement. The results indicate that the synthetic data generated by SonicSim can effectively generalize to real-world scenarios. Demo and code are publicly available at https://cslikai.cn/SonicSim/.', 'score': 2, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'd284601ce4d1a07d', 'authors': ['Kai Li', 'Wendi Sang', 'Chang Zeng', 'Runxuan Yang', 'Guo Chen', 'Xiaolin Hu'], 'affiliations': ['Chinese Institute for Brain Research (CIBR), Beijing 100010, China', 'Department of Computer Science and Technology, Institute for AI, BNRist, Tsinghua University, Beijing 100084, China', 'National Institute of Informatics, Tokyo, Japan', 'Tsinghua Laboratory of Brain and Intelligence (THBI), IDG/McGovern Institute for Brain Research, Tsinghua University, Beijing 100084, China'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01481.jpg', 'data': {'categories': ['#audio', '#dataset', '#synthetic', '#data', '#benchmark', '#open_source', '#robotics', '#3d'], 'emoji': '🔊', 'ru': {'title': 'SonicSim: Реалистичная симуляция движущихся источников звука для задач обработки речи', 'desc': 'SonicSim - это инструментарий для создания синтетических данных для задач разделения и улучшения речи в условиях движущихся источников звука. Он основан на платформе Habitat-sim и позволяет настраивать множество параметров на уровне сцены, микрофонов и источников звука. С помощью SonicSim был создан бенчмарк-датасет SonicSet для оценки моделей обработки речи. Эксперименты показали, что синтетические данные, сгенерированные SonicSim, хорошо обобщаются на реальные сценарии.'}, 'en': {'title': 'SonicSim: Bridging the Gap in Speech Separation Datasets', 'desc': 'This paper presents SonicSim, a synthetic toolkit designed to create customizable datasets for evaluating speech separation and enhancement models in dynamic environments. Traditional datasets often lack the necessary diversity and realism, which SonicSim addresses by allowing multi-level adjustments for various sound source conditions. The authors constructed a benchmark dataset called SonicSet, which combines data from multiple sources to facilitate comprehensive model evaluation. Results show that the synthetic data generated by SonicSim can effectively generalize to real-world applications, bridging the gap between synthetic and real-world datasets.'}, 'zh': {'title': 'SonicSim：为移动声源生成真实感合成数据的工具', 'desc': '本论文介绍了一种名为SonicSim的合成工具包，用于生成可定制的移动声源数据。该工具包基于Habitat-sim平台，支持多层次的调整，能够生成更丰富的合成数据。通过SonicSim，我们构建了一个移动声源基准数据集SonicSet，用于评估语音分离和增强模型。研究结果表明，SonicSim生成的合成数据能够有效地推广到真实场景中。'}}}, {'id': 'https://huggingface.co/papers/2410.01518', 'title': 'InfiniPot: Infinite Context Processing on Memory-Constrained LLMs', 'url': 'https://huggingface.co/papers/2410.01518', 'abstract': 'Handling long input contexts remains a significant challenge for Large Language Models (LLMs), particularly in resource-constrained environments such as mobile devices. Our work aims to address this limitation by introducing InfiniPot, a novel KV cache control framework designed to enable pre-trained LLMs to manage extensive sequences within fixed memory constraints efficiently, without requiring additional training. InfiniPot leverages Continual Context Distillation (CCD), an iterative process that compresses and retains essential information through novel importance metrics, effectively maintaining critical data even without access to future context. Our comprehensive evaluations indicate that InfiniPot significantly outperforms models trained for long contexts in various NLP tasks, establishing its efficacy and versatility. This work represents a substantial advancement toward making LLMs applicable to a broader range of real-world scenarios.', 'score': 2, 'issue_id': 1, 'pub_date': '2024-10-02', 'pub_date_card': {'ru': '2 октября', 'en': 'October 2', 'zh': '10月2日'}, 'hash': 'a13831078cca1658', 'authors': ['Minsoo Kim', 'Kyuhong Shim', 'Jungwook Choi', 'Simyung Chang'], 'affiliations': ['Hanyang University', 'Qualcomm AI Research, Qualcomm Korea'], 'pdf_title_img': 'assets\\pdf\\title_img\\2410.01518.jpg', 'data': {'categories': ['#long_context', '#training', '#optimization', '#inference'], 'emoji': '🔄', 'ru': {'title': 'Эффективная обработка длинного контекста в LLM без переобучения', 'desc': 'InfiniPot - это новая система управления кэшем ключ-значение для больших языковых моделей (LLM). Она позволяет обрабатывать длинные последовательности в условиях ограниченной памяти без дополнительного обучения. InfiniPot использует метод непрерывной дистилляции контекста (CCD) для сжатия и сохранения важной информации. Система превосходит модели, обученные для работы с длинным контекстом, в различных задачах обработки естественного языка.'}, 'en': {'title': 'InfiniPot: Efficient Long Context Management for LLMs', 'desc': 'This paper presents InfiniPot, a new framework that helps Large Language Models (LLMs) handle long input sequences efficiently, especially on devices with limited resources. It uses a technique called Continual Context Distillation (CCD) to compress important information and keep it accessible without needing extra training. By focusing on key data, InfiniPot allows LLMs to perform better in natural language processing tasks compared to models specifically trained for long contexts. This advancement makes it easier to use LLMs in various real-world applications where memory is a constraint.'}, 'zh': {'title': 'InfiniPot：高效管理长上下文的创新框架', 'desc': '处理长输入上下文对大型语言模型（LLMs）仍然是一个重大挑战，尤其是在资源受限的环境中，如移动设备。我们的研究提出了InfiniPot，这是一种新颖的KV缓存控制框架，旨在使预训练的LLMs能够在固定内存限制内高效管理大量序列，而无需额外训练。InfiniPot利用持续上下文蒸馏（CCD），通过新颖的重要性度量迭代压缩和保留关键信息，即使在没有未来上下文的情况下也能有效维护重要数据。我们的综合评估表明，InfiniPot在各种自然语言处理任务中显著优于为长上下文训练的模型，证明了其有效性和多功能性。'}}}];
        const articlesContainer = document.getElementById('articles-container');
        const sortDropdown = document.getElementById('sort-dropdown');
        const categoryFiltersContainer = document.getElementById('category-filters');
        const categoryFiltersLogicOptions = document.getElementById('category-options');
        const categoryToggle = document.getElementById('category-toggle');
        const clearCategoriesButton = document.getElementById('clear-categories');
        let selectedCategories = [];
        let selectedArticles = [];
        let sortBy = 'issue_id';     
        let showLimitHint = false; 
        let filterLogicIsAnd = false;

        function getUrlParameters() {
            const urlParams = new URLSearchParams(window.location.search);
            const categoriesParam = urlParams.get('cat');
            let categories = categoriesParam ? categoriesParam.split(',') : [];
            categories = categories.map(element => `#${element}`);
            return categories
        }

        function updateUrlWithCategories() {
            let cleanedCategories = selectedCategories.map(element => element.replace(/^#/, ''));
            const newUrl = cleanedCategories.length > 0 
                ? `${window.location.pathname}?cat=${cleanedCategories.join(',')}`
                : window.location.pathname;
            console.log("cleanedCategories", cleanedCategories)
            window.history.pushState({}, '', newUrl);
        }

        function loadSettings() {
            const themeToggle = document.getElementById('theme-toggle');
            const sortDropdown = document.getElementById('sort-dropdown');

            const isDarkMode = localStorage.getItem('darkMode') === 'true';
            let settingSortBy = localStorage.getItem('sort_by');
            filterLogicIsAnd = localStorage.getItem('filter_logic_is_and') === 'true';
            
            if (isDarkMode) {
                document.body.classList.remove('light-theme');
                document.body.classList.add('dark-theme');
                themeToggle.checked = true;
                const title = document.getElementById('doomgrad');
                title.innerHTML = "hf nightly";
                const titleSign = document.getElementById('doomgrad-icon');
                titleSign.classList.add('rotate');
            }

            if ((!settingSortBy) || (settingSortBy === 'null')) {
                settingSortBy = 'issue_id';
            }

            if (filterLogicIsAnd) {
                document.getElementById('filter-logic-and').checked = true;
            } else {
                document.getElementById('filter-logic-or').checked = true;
            }

            sortDropdown.value = settingSortBy;
            sortBy = settingSortBy;
        }

        document.getElementById('theme-toggle').addEventListener('change', toggleTheme);
        document.getElementById('filter-logic-and').addEventListener('change', () => {
            filterLogicIsAnd = true;
            localStorage.setItem('filter_logic_is_and', 'true');
            filterAndRenderArticles();
            updateSelectedArticlesTitle();
        });
        document.getElementById('filter-logic-or').addEventListener('change', () => {
            filterLogicIsAnd = false;
            localStorage.setItem('filter_logic_is_and', 'false');
            filterAndRenderArticles();
            updateSelectedArticlesTitle();
        });

        function getUniqueCategories(articles) {
            const categories = new Set();
            articles.forEach(article => {
                if (article.data && article.data.categories) {
                    article.data.categories.forEach(cat => categories.add(cat));
                }
            });
            let res = Array.from(categories);
            res.sort();
            return res;
        }

        function createCategoryButtons() {
            //const categories = getUniqueCategories(articlesData);
            const categories = ['#3d (3)', '#agents', '#agi', '#alignment (5)', '#architecture (12)', '#audio (3)', '#benchmark (11)', '#cv (6)', '#data (5)', '#dataset (10)', '#diffusion (2)', '#ethics (1)', '#games (1)', '#graphs (1)', '#hallucinations (2)', '#healthcare', '#inference (4)', '#interpretability (4)', '#leakage (1)', '#long_context (3)', '#low_resource (1)', '#machine_translation (1)', '#math (3)', '#multilingual (4)', '#multimodal (2)', '#open_source (9)', '#optimization (12)', '#plp (1)', '#rag (1)', '#reasoning (6)', '#rl (2)', '#rlhf (2)', '#robotics (2)', '#science (1)', '#security (1)', '#small_models (1)', '#story_generation', '#survey', '#synthetic (6)', '#training (16)', '#transfer_learning (5)', '#video (1)'];

            categories.forEach(category => {
                let catNameSplitted = category.split(/(\s+)/);
                let catName = catNameSplitted[0];
                const button = document.createElement('span');
                button.textContent = catName;
                button.className = 'category-button';
                if (catNameSplitted.length < 2) {
                    button.classList.add('inactive');
                };
                button.onclick = () => toggleCategory(catName, button);
                categoryFiltersContainer.appendChild(button);
            });
        }

        function toggleCategory(category, button) {
            const index = selectedCategories.indexOf(category);
            if (index === -1) {
                selectedCategories.push(category);
                button.classList.add('active');
            } else {
                selectedCategories.splice(index, 1);
                button.classList.remove('active');
            }         
            filterAndRenderArticles();
            saveCategorySelection();
            updateSelectedArticlesTitle();
            updateUrlWithCategories();
            setFilterOptionsVisibility();
        }

        function saveCategorySelection() {
            localStorage.setItem('selectedCategories', JSON.stringify(selectedCategories));
        }

        function updateSelectedArticlesTitle() {
            if ((selectedArticles.length === articlesData.length) & (selectedCategories.length === 0)) {
                categoryToggle.textContent = `🏷️ ${filterLabel[currentLang]}`;
            } else {
                categoryToggle.textContent = `🏷️ ${filterLabel[currentLang]} (${formatArticlesTitle(selectedArticles.length, currentLang)})`;
            }
        }

        function cleanCategorySelection() {
            localStorage.setItem('selectedCategories', JSON.stringify('[]'));
        }

        function loadCategorySelection() {
            const urlCategories = getUrlParameters();
            if (urlCategories.length > 0) {
                selectedCategories = urlCategories;
                saveCategorySelection();
            } else {
                const savedCategories = localStorage.getItem('selectedCategories');
                if (savedCategories && savedCategories !== '"[]"') {
                    selectedCategories = JSON.parse(savedCategories);                    
                }
            }
            updateCategoryButtonStates();
        }

        function updateCategoryButtonStates() {
            const buttons = categoryFiltersContainer.getElementsByClassName('category-button');
            Array.from(buttons).forEach(button => {
                if (selectedCategories.includes(button.textContent)) {
                    button.classList.add('active');
                } else {
                    button.classList.remove('active');
                }
            });
        }

        function filterAndRenderArticles() {
            console.log(selectedCategories);
            let filteredArticles; 

            if (filterLogicIsAnd) {
                filteredArticles = selectedCategories.length === 0
                    ? articlesData
                    : articlesData.filter(article => 
                        article.data && article.data.categories && 
                        selectedCategories.every(cat => article.data.categories.includes(cat))
                );
            } else {
                filteredArticles = selectedCategories.length === 0
                    ? articlesData
                    : articlesData.filter(article => 
                        article.data && article.data.categories && 
                        article.data.categories.some(cat => selectedCategories.includes(cat))
                    );            
            }

            console.log('filteredArticles', filteredArticles)

            selectedArticles = filteredArticles;
            sortArticles(selectedArticles);
        }

        function clearAllCategories() {
            selectedCategories = [];
            updateCategoryButtonStates();
            filterAndRenderArticles();
            saveCategorySelection();
            updateSelectedArticlesTitle();
            updateUrlWithCategories();
        }

        function renderArticles(articles) {
            if (articles.length > 50) {
                articles = articles.slice(0, 50);
                showLimitHint = true;
            } else {
                showLimitHint = false;
            }
            console.log(articles);
            articlesContainer.innerHTML = '';
            articles.forEach((item, index) => {
                if ("error" in item) {
                    console.log(`Omitting JSON. ${item["raw_data"]}`);
                    return;
                }
                
                let explanation = item["data"][currentLang]["desc"];
                let title = item["data"][currentLang]["title"];

                const cats = item["data"]["categories"].slice(0, 5).join(" ");
                
                let affiliations = ""
                if ('affiliations' in item) {
                    affiliations = item["affiliations"].slice(0, 10).join(", ");
                }

                let pdfImg = "https://hfday.ru/img/title_stub.png"
                if ('pdf_title_img' in item) {
                    pdfImg = 'https://hfday.ru/' + item['pdf_title_img']
                    
                }                

                const articleHTML = `
                    <article class='x${item["hash"]}'>
                        <div class="background-digit">${index + 1}</div>
                        <div class="article-content" onclick="toggleAbstract(${index})">
                            <div class="article-title-cont">
                                <div style="display:table-cell; vertical-align: middle;">
                                    <div class="article-title"><h2>${item['data']['emoji']} ${title}</h2></div>
                                </div>
                            </div>
                            <p class="meta">
                            🔺 ${item['score']}. ${item['title']}</p>
                            <p class="pub-date">${publishedLabel[currentLang]}${item['pub_date_card'][currentLang]}</p>
                            
                            <div class="article-pdf-title-img-cont"><img class="article-pdf-title-img" src="${pdfImg}"/></div>
                            
                            <div id="abstract-${index}" class="abstract">
                                <p>${explanation}</p>
                                <div id="toggle-${index}" class="abstract-toggle">...</div>
                            </div>

                            <div class="links">
                                <a href="${item['url']}" target="_blank">${paperLabel[currentLang]}</a>
                            </div>

                            <div class="affiliations">${affiliations}</div>

                            <div class="tags">${cats}</div>
                        </div>
                    </article>
                `;
                articlesContainer.innerHTML += articleHTML;
            });
        }
        
        function sortArticles() {
            let sortedArticles = [...selectedArticles];
            if (sortBy === 'issue_id') {
                sortedArticles.sort((a, b) => b.issue_id - a.issue_id);
            } else if (sortBy === 'pub_date') {
                sortedArticles.sort((a, b) => b.pub_date.localeCompare(a.pub_date));
            } else {
                sortedArticles.sort((a, b) => b.score - a.score);
            }
            renderArticles(sortedArticles);
            localStorage.setItem('sort_by', sortBy);
        }
        
        sortDropdown.addEventListener('change', (event) => {
            sortBy = event.target.value;
            sortArticles(event.target.value);
        });

        categoryToggle.addEventListener('click', () => {
            categoryFiltersContainer.classList.toggle('expanded');
            setFilterOptionsVisibility();
        });

        clearCategoriesButton.addEventListener('click', () => {
            clearAllCategories();
            setFilterOptionsVisibility();
        });

        function setFilterOptionsVisibility() {
            if (selectedCategories.length > 0) {
                categoryFiltersLogicOptions.style.display = 'inline-block';
            } else {
                categoryFiltersLogicOptions.style.display = 'none';
            }
        } 
        
        function updateTimeDiffs() {
            const timeDiff = document.getElementById('timeDiff');
            timeDiff.innerHTML = '🔄 ' + getTimeDiff('2024-10-03 09:00',lang=currentLang);
        }
        function updateSortingOptions() {
            const sortingLabels = {
                ru: {
                    default: "рейтингу",
                    pub_date: "дате публикации",
                    issue_id: "добавлению на HF"
                },
                en: {
                    default: "rating",
                    pub_date: "publication date",
                    issue_id: "HF addition date"
                },
                zh: {
                    default: "评分",
                    pub_date: "发布日期",
                    issue_id: "HF上传日期"
                }
            };

            const dropdown = document.getElementById('sort-dropdown');
            const options = dropdown.options;

            for (let i = 0; i < options.length; i++) {
                const optionValue = options[i].value;
                console.log(sortingLabels)
                options[i].text = sortingLabels[currentLang][optionValue];
            }
        }
        function updateLocalization() {
            const titleDate = document.getElementById('title-date');
            const prevDate = document.getElementById('prev-date');
            const nextDate = document.getElementById('next-date');
            const topMonth = document.getElementById('top-month-label');
            const topDay = document.getElementById('top-day-label');
            const papersCount = document.getElementById('title-articles-count');
            const sortLabelText = document.getElementById('sort-label-text');
            titleDate.innerHTML = feedDate[currentLang];
            prevDate.innerHTML = feedDatePrev[currentLang];
            nextDate.innerHTML = feedDateNext[currentLang];
            papersCount.innerHTML = formatArticlesTitle(articlesData.length, currentLang);
            sortLabelText.innerHTML = sortLabel[currentLang];
            if (topMonth) {
                topMonth.innerHTML = topMonthLabel[currentLang];
            }  
            if (topDay) {
                topDay.innerHTML = topDayLabel[currentLang];
            }             
            updateSelectedArticlesTitle();
            updateSortingOptions();
        } 
        function hideNextLink(format) {
            if (format === 'monthly') {
                if (isCurrentMonth('2024-10-03 09:00')) {
                    const element = document.getElementById('nav-next');
                    if (element) {    
                        element.style.display = 'none';
                    }
                }
            } else {            
                if (isToday('2024-10-03 09:00')) {
                    const element = document.getElementById('nav-next');
                    if (element) {    
                        element.style.display = 'none';
                    }
                }
            }
        }

        loadSettings();
        createCategoryButtons();
        loadCategorySelection();
        filterAndRenderArticles();
        updateSelectedArticlesTitle();
        updateTimeDiffs();
        hideNextLink('daily'); 
        initializeLanguageFlags();
        updateLocalization();
        setFilterOptionsVisibility();
    </script>
</body>
</html>