-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2024-09-27.html
1247 lines (1176 loc) · 104 KB
/
2024-09-27.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-C1CRWDNJ1J"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-C1CRWDNJ1J');
</script>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"><title>HF. 12 papers. September 27.</title>
<link rel="icon" href="favicon.svg" sizes="any" type="image/svg+xml">
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=Roboto+Slab:[email protected]&family=Tiny5&display=swap" rel="stylesheet">
<style>
:root {
--primary-color: cornflowerblue;
--primary-color-dark: #fffd87cf;
--secondary-color: #fff;
--background-color: #eee;
--text-color: #333333;
--header-color: cornflowerblue;
--body-color: #eee;
--menu-color: #002370;
}
.background-digit {
position: absolute;
font-family: 'Tiny5';
bottom: -20px;
right: -10px;
font-size: 8em;
font-weight: 400;
color: #0989ea22;
z-index: 2;
line-height: 1;
}
.dark-theme .background-digit {
color: #e9e78f3d;
}
body {
font-family: 'Roboto Slab', sans-serif;
line-height: 1.6;
color: var(--text-color);
margin: 0;
padding: 0;
min-height: 100vh;
display: flex;
flex-direction: column;
}
.container {
max-width: 1500px;
margin: 0 auto;
padding: 0 20px;
flex: 1 0 auto;
}
.a-clean {
color: var(--secondary-color);
text-decoration: none;
}
.a-clean:hover {
color: #fff;
}
header {
padding: 3.6em 0 2.4em 0;
text-align: center;
}
footer {
background-color: var(--primary-color);
color: white;
text-align: center;
margin-top: 2em;
flex-shrink: 0;
padding: 20px;
}
h1 {
font-size: 2.4em;
margin: 0;
font-weight: 700;
}
.article-title-cont {
margin: -21px -21px 0px -21px;
padding: 10px 20px;
background: cornflowerblue;
display: table;
min-height: 5.9em;
}
.dark-theme .article-title-cont {
background: #444444;
}
.article-title {
color: white;
}
.article-title h2 {
margin: 0px;
padding: 0px;
font-weight: 400;
text-align:center;
}
h2 {
# color: var(--primary-color);
font-size: 1.2em;
margin-top: 0;
margin-bottom: 0.5em;
}
header p {
font-size: 1.2em;
margin-top: 0.5em;
font-weight: 300;
}
main {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 1.5em;
padding: 10px 0 20px 0;
}
body.dark-tmeme>header {
background-color: background-color: #333333;
color: white;
}
body.dark-theme>div>main>article>div.article-content>p.meta {
color: #fff;
}
body.light-theme>div>main>article>div.article-content>p.meta {
color: #555;
}
body.dark-theme>div>main>article>div.article-content>p.pub-date {
color: #ccc;
}
body.light-theme>div>main>article>div.article-content>p.pub-date {
color: #555;
}
body.dark-theme>div>main>article>div.article-content>div.tags {
color: #ccc;
}
body.light-theme>div>main>article>div.article-content>div.tags {
color: #fff;
}
body.light-theme>header {
background-color: var(--header-color);
color: white;
}
article {
border-radius: 5px;
border: 1px solid #ddd;
overflow: hidden;
transition: background-color 0.2s ease;
display: flex;
flex-direction: column;
position: relative;
}
.article-content {
padding: 1.3em;
flex-grow: 1;
display: flex;
flex-direction: column;
position: relative;
z-index: 1;
cursor: pointer;
}
body.dark-theme>div>main>article {
background-color: #444;
border: none;
}
body.light-theme>div>main>article {
background-color: #fff;
}
body.dark-theme>div>main>article:hover {
background-color: #414141;
}
body.light-theme>div>main>article:hover {
background-color: #fafafa;
}
.meta {
font-size: 0.9em;
margin-bottom: 0em;
font-weight: 500;
margin: 20px 0 0px 0;
padding-bottom: 20px;
border-bottom: 1px solid #ddd;
}
.pub-date {
font-size: 0.8em;
margin-bottom: 0.8em;
font-weight: 400;
text-align: right;
font-family: Roboto;
}
.tags {
font-size: 0.9em;
margin-bottom: 0;
position: absolute;
bottom: 0px;
font-weight: 300;
font-family: 'Roboto Slab';
background: #555;
left: 0;
width: 100%;
padding: 10px 20px;
}
.abstract {
position: relative;
max-height: 170px;
overflow: hidden;
transition: max-height 0.3s ease;
cursor: pointer;
}
.abstract.expanded {
max-height: 1000px;
}
.abstract-toggle {
position: absolute;
bottom: 4px;
right: 0;
cursor: pointer;
color: var(--primary-color);
float: right;
font-weight: 400;
}
.explanation {
background-color: #e8f5e9;
border-left: 4px solid var(--secondary-color);
padding: 1em;
margin-top: 1.5em;
}
.links {
margin-top: 1.5em;
margin-bottom: 20px;
}
.affiliations {
margin-bottom: 50px;
padding:10px;
font-size: 0.9em;
text-align: center
}
a {
color: var(--primary-color);
text-decoration: none;
font-weight: 500;
transition: color 0.3s ease;
}
.dark-theme a {
color: var(--primary-color-dark);
}
a:hover {
color: #e73838;
}
.light-theme {
background-color: var(--body-color);
color: #333333;
}
.dark-theme {
background-color: #333333;
color: #ffffff;
}
.theme-switch {
position: absolute;
top: 20px;
right: 20px;
display: flex;
align-items: center;
}
.switch {
position: relative;
display: inline-block;
width: 50px;
height: 30px;
}
.switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
transition: .4s;
border-radius: 30px;
}
.slider:before {
position: absolute;
content: "";
height: 24px;
width: 24px;
left: 3px;
bottom: 3px;
background-color: white;
transition: .4s;
border-radius: 50%;
}
input:checked + .slider {
background-color: var(--primary-color);
}
input:checked + .slider:before {
transform: translateX(20px);
}
.switch-label {
margin-right: 10px;
}
.sub-header-container {
display: flex;
justify-content: space-between;
align-items: center;
flex-wrap: wrap;
gap: 15px;
margin-top: 7px;
}
.sub-header-container-2 {
display: flex;
justify-content: left;
align-items: center;
flex-wrap: wrap;
gap: 15px;
margin: 0 auto;
}
.update-info-container {
margin-top: 15px;
margin-bottom: 0px;
text-align: left;
flex: 1;
}
.sort-container {
margin-top: 15px;
margin-bottom: 0px;
text-align: right;
flex: 2;
}
.category-toggle-container {
display: inline-block;
margin-top: 15px;
margin-bottom: 10px;
cursor: pointer;
}
.category-option-container {
margin-top: 15px;
margin-bottom: 10px;
display: none;
margin-left: auto;
}
.category-option-container.expanded {
display: block;
}
.sort-dropdown {
padding: 5px 10px;
font-size: 16px;
border-radius: 5px;
border: 1px solid #ccc;
background-color: white;
color: var(--text-color);
font-family: 'Roboto Slab', sans-serif;
}
.sort-label {
margin-right: 10px;
font-size: 1.0em !important;
}
.dark-theme .sort-dropdown {
background-color: #444;
color: white;
border-color: var(--text-color);
}
.title-sign {
display: inline-block;
transition: all 0.5s ease;
}
.rotate {
transform: rotate(45deg) translateY(-6px);
transform-origin: center;
}
.title-text {
display: inline;
padding-left: 10px;
}
.category-filters {
margin-top: 20px;
margin-bottom: 20px;
text-align: center;
display: none;
}
.category-filters.expanded {
display: block;
margin-top: 10px;
}
.category-button {
display: inline-block;
margin: 5px;
padding: 5px 10px;
border-radius: 15px;
background-color: #f0f0f0;
color: #333;
cursor: pointer;
transition: background-color 0.3s ease;
}
.category-button.active {
background-color: var(--primary-color);
color: white;
}
.category-button.inactive:not(.active) {
color: #ccc;
}
.dark-theme .category-button {
background-color: #555;
color: #fff;
}
.dark-theme .category-button.active {
background-color: var(--primary-color);
}
.dark-theme .category-button.inactive:not(.active) {
color: #888;
}
.clear-categories {
display: inline-block;
margin: 5px;
padding: 5px 10px;
border-radius: 15px;
background-color: #f0f0f0;
color: #333;
cursor: pointer;
transition: background-color 0.3s ease;
}
.clear-categories:hover {
background-color: #bbb;
}
.svg-container {
display: inline-block;
position: relative;
overflow: hidden;
}
.svg-container span {
position: relative;
z-index: 1;
}
.svg-container svg {
position: absolute;
bottom: 0;
left: 0;
z-index: 0;
}
.nav-menu {
background-color: var(--menu-color);
padding: 2px 0 2px 0;
display: inline-block;
position: relative;
overflow: hidden;
width: 100%;
}
.nav-container {
max-width: 1500px;
margin: 0 auto;
padding: 0 20px;
display: flex;
justify-content: left;
gap: 3em;
}
.nav-container span a {
color: white;
}
.nav-item {
color: white;
padding: 3px 0px;
cursor: pointer;
font-weight: 400;
}
.nav-item:hover {
background-color: rgba(255, 255, 255, 0.1);
border-color: rgba(255, 255, 255, 0.3);
}
.language-flags {
display: flex;
gap: 7px;
padding: 5px 0px;
margin-left: auto;
}
.flag-svg {
width: 22px;
height: 22px;
cursor: pointer;
opacity: 0.4;
transition: opacity 0.3s ease;
border-radius: 2px;
}
.flag-svg.active {
opacity: 1;
}
.flag-svg:hover {
opacity: 0.8;
}
.dark-theme .nav-menu {
background-color: #333;
}
.dark-theme .nav-item {
color: white;
}
.dark-theme .nav-item:hover {
background-color: rgba(255, 255, 255, 0.05);
}
.pointer { cursor: pointer; }
.article-pdf-title-img {
max-width: 100%;
max-height: 400px;
display: inline-block;
margin-top: 10px;
margin-bottom: 10px;
border-radius: 5px;
}
.article-pdf-title-img-cont {
text-align: center;
}
.dark-theme .article-pdf-title-img {
opacity: 0.8;
filter: grayscale(1);
}
@media (max-width: 600px) {
.nav-container {
flex-direction: row;
gap: 1.5em;
}
.nav-item {
padding: 3px 0px;
}
}
@media (max-width: 768px) {
.category-filters {
display: none;
}
.category-toggle {
display: inline-block;
width: 100%;
text-align: left;
}
.category-filters.expanded {
display: block;
margin-top: 10px;
}
}
@media (max-width: 600px) {
.sub-header-container {
flex-direction: column;
align-items: flex-start;
}
.sort-container {
width: 100%;
display: flex;
justify-content: left;
margin: 0 auto;
}
.sort-dropdown {
margin-left: auto;
}
.sort-label {
margin-top: 5px;
float: left;
}
.sub-header-container-2 {
flex-direction: row;
align-items: flex-start;
}
.update-info-container {
text-align: left;
width: 100%;
margin-bottom: 0px;
}
.category-toggle-container {
margin-top: 15px;
text-align: left;
margin-bottom: 10px;
}
.category-option-container {
margin-top: 15px;
text-align: center;
margin-bottom: 10px;
}
main {
grid-template-columns: repeat(auto-fit);
gap: 0em;
padding: 10px 0 20px 0;
margin: 0 -20px;
}
footer {
margin-top: -20px;
}
article {
border-radius: 0px;
}
}
</style>
<script>
function toggleAbstract(id) {
var abstract = document.getElementById('abstract-' + id);
var toggle = document.getElementById('toggle-' + id);
if (abstract.classList.contains('expanded')) {
abstract.classList.remove('expanded');
toggle.textContent = '...';
} else {
abstract.classList.add('expanded');
toggle.textContent = '';
}
}
function getTimeDiff(dateString, lang='ru') {
const timeUnits = {
ru: {
minute: ["минуту", "минуты", "минут"],
hour: ["час", "часа", "часов"],
day: ["день", "дня", "дней"],
justNow: "только что",
ago: "назад"
},
en: {
minute: ["minute", "minutes", "minutes"],
hour: ["hour", "hours", "hours"],
day: ["day", "days", "days"],
justNow: "just now",
ago: "ago"
},
zh: {
minute: ["分钟", "分钟", "分钟"],
hour: ["小时", "小时", "小时"],
day: ["天", "天", "天"],
justNow: "刚刚",
ago: "前"
}
};
function getPlural(number, words, lang) {
if (lang === 'ru') {
if (number % 10 === 1 && number % 100 !== 11) {
return words[0];
} else if (number % 10 >= 2 && number % 10 <= 4 && (number % 100 < 10 || number % 100 >= 20)) {
return words[1];
} else {
return words[2];
}
} else if (lang === 'en') {
return number === 1 ? words[0] : words[1];
} else {
// Chinese doesn't need plural forms
return words[0];
}
}
function formatTimeDiff(number, unit, lang) {
const unitWord = getPlural(number, timeUnits[lang][unit], lang);
if (lang === 'zh') {
return `${number}${unitWord}${timeUnits[lang].ago}`;
} else {
return `${number} ${unitWord} ${timeUnits[lang].ago}`;
}
}
if (!['ru', 'en', 'zh'].includes(lang)) {
throw new Error('Unsupported language. Supported languages are: ru, en, zh');
}
const pastDate = new Date(dateString.replace(" ", "T") + ":00Z");
const currentDate = new Date();
const diffInSeconds = Math.floor((currentDate - pastDate) / 1000);
const minutes = Math.floor(diffInSeconds / 60);
const hours = Math.floor(diffInSeconds / 3600);
const days = Math.floor(diffInSeconds / 86400);
if (minutes === 0) {
return timeUnits[lang].justNow;
} else if (minutes < 60) {
return formatTimeDiff(minutes, 'minute', lang);
} else if (hours < 24) {
return formatTimeDiff(hours, 'hour', lang);
} else {
return formatTimeDiff(days, 'day', lang);
}
}
function isToday(dateString) {
const inputDate = new Date(dateString);
const today = new Date();
return (
inputDate.getFullYear() === today.getFullYear() &&
inputDate.getMonth() === today.getMonth() &&
inputDate.getDate() === today.getDate()
);
}
function isCurrentMonth(dateString) {
const inputDate = new Date(dateString);
const today = new Date();
return (
inputDate.getFullYear() === today.getFullYear() &&
inputDate.getMonth() === today.getMonth()
);
}
function formatArticlesTitle(number, lang='ru') {
const lastDigit = number % 10;
const lastTwoDigits = number % 100;
let word;
if (!['ru', 'en', 'zh'].includes(lang)) {
throw new Error('Unsupported language. Supported languages are: ru, en, zh');
}
if (lang === 'ru') {
if (lastTwoDigits >= 11 && lastTwoDigits <= 14) {
word = "статей";
} else if (lastDigit === 1) {
word = "статья";
} else if (lastDigit >= 2 && lastDigit <= 4) {
word = "статьи";
} else {
word = "статей";
}
} else if (lang === 'en') {
if (number === 1) {
word = 'paper'
} else {
word = 'papers'
}
} else if (lang === 'zh') {
word = "篇论文"
}
if (lang === 'zh') {
return `${number}${word}`;
} else {
return `${number} ${word}`;
}
}
</script>
</head>
<body class="light-theme">
<header>
<div class="container">
<a href="https://hfday.ru" class="a-clean"><h1 class="title-sign" id="doomgrad-icon">🔺</h1><h1 class="title-text" id="doomgrad">hf daily</h1></a>
<p><span id="title-date">27 сентября</span> | <span id="title-articles-count">12 papers</span></p>
</div>
<div class="theme-switch">
<label class="switch">
<input type="checkbox" id="theme-toggle">
<span class="slider"></span>
</label>
</div>
</header>
<div class="nav-menu">
<div class="nav-container">
<span class="nav-item" id="nav-prev"><a href="/d/2024-09-26.html">⬅️ <span id="prev-date">26.09</span></a></span>
<span class="nav-item" id="nav-next"><a href="/d/2024-09-30.html">➡️ <span id="next-date">30.09</span></a></span>
<span class="nav-item" id="nav-monthly"><a href="/m/2024-09.html">📈 <span id='top-month-label'>Месяц</span></a></span>
<div class="language-flags">
<svg class="flag-svg" data-lang="ru" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><path fill="#1435a1" d="M1 11H31V21H1z"></path><path d="M5,4H27c2.208,0,4,1.792,4,4v4H1v-4c0-2.208,1.792-4,4-4Z" fill="#fff"></path><path d="M5,20H27c2.208,0,4,1.792,4,4v4H1v-4c0-2.208,1.792-4,4-4Z" transform="rotate(180 16 24)" fill="#c53a28"></path><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path></svg>
<svg class="flag-svg" data-lang="zh" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><rect x="1" y="4" width="30" height="24" rx="4" ry="4" fill="#db362f"></rect><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path fill="#ff0" d="M7.958 10.152L7.19 7.786 6.421 10.152 3.934 10.152 5.946 11.614 5.177 13.979 7.19 12.517 9.202 13.979 8.433 11.614 10.446 10.152 7.958 10.152z"></path><path fill="#ff0" d="M12.725 8.187L13.152 8.898 13.224 8.072 14.032 7.886 13.269 7.562 13.342 6.736 12.798 7.361 12.035 7.037 12.461 7.748 11.917 8.373 12.725 8.187z"></path><path fill="#ff0" d="M14.865 10.372L14.982 11.193 15.37 10.46 16.187 10.602 15.61 10.007 15.997 9.274 15.253 9.639 14.675 9.044 14.793 9.865 14.048 10.23 14.865 10.372z"></path><path fill="#ff0" d="M15.597 13.612L16.25 13.101 15.421 13.13 15.137 12.352 14.909 13.149 14.081 13.179 14.769 13.642 14.541 14.439 15.194 13.928 15.881 14.391 15.597 13.612z"></path><path fill="#ff0" d="M13.26 15.535L13.298 14.707 12.78 15.354 12.005 15.062 12.46 15.754 11.942 16.402 12.742 16.182 13.198 16.875 13.236 16.047 14.036 15.827 13.26 15.535z"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path></svg>
<svg class="flag-svg" data-lang="en" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><rect x="1" y="4" width="30" height="24" rx="4" ry="4" fill="#fff"></rect><path d="M1.638,5.846H30.362c-.711-1.108-1.947-1.846-3.362-1.846H5c-1.414,0-2.65,.738-3.362,1.846Z" fill="#a62842"></path><path d="M2.03,7.692c-.008,.103-.03,.202-.03,.308v1.539H31v-1.539c0-.105-.022-.204-.03-.308H2.03Z" fill="#a62842"></path><path fill="#a62842" d="M2 11.385H31V13.231H2z"></path><path fill="#a62842" d="M2 15.077H31V16.923000000000002H2z"></path><path fill="#a62842" d="M1 18.769H31V20.615H1z"></path><path d="M1,24c0,.105,.023,.204,.031,.308H30.969c.008-.103,.031-.202,.031-.308v-1.539H1v1.539Z" fill="#a62842"></path><path d="M30.362,26.154H1.638c.711,1.108,1.947,1.846,3.362,1.846H27c1.414,0,2.65-.738,3.362-1.846Z" fill="#a62842"></path><path d="M5,4h11v12.923H1V8c0-2.208,1.792-4,4-4Z" fill="#102d5e"></path><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path><path fill="#fff" d="M4.601 7.463L5.193 7.033 4.462 7.033 4.236 6.338 4.01 7.033 3.279 7.033 3.87 7.463 3.644 8.158 4.236 7.729 4.827 8.158 4.601 7.463z"></path><path fill="#fff" d="M7.58 7.463L8.172 7.033 7.441 7.033 7.215 6.338 6.989 7.033 6.258 7.033 6.849 7.463 6.623 8.158 7.215 7.729 7.806 8.158 7.58 7.463z"></path><path fill="#fff" d="M10.56 7.463L11.151 7.033 10.42 7.033 10.194 6.338 9.968 7.033 9.237 7.033 9.828 7.463 9.603 8.158 10.194 7.729 10.785 8.158 10.56 7.463z"></path><path fill="#fff" d="M6.066 9.283L6.658 8.854 5.927 8.854 5.701 8.158 5.475 8.854 4.744 8.854 5.335 9.283 5.109 9.979 5.701 9.549 6.292 9.979 6.066 9.283z"></path><path fill="#fff" d="M9.046 9.283L9.637 8.854 8.906 8.854 8.68 8.158 8.454 8.854 7.723 8.854 8.314 9.283 8.089 9.979 8.68 9.549 9.271 9.979 9.046 9.283z"></path><path fill="#fff" d="M12.025 9.283L12.616 8.854 11.885 8.854 11.659 8.158 11.433 8.854 10.702 8.854 11.294 9.283 11.068 9.979 11.659 9.549 12.251 9.979 12.025 9.283z"></path><path fill="#fff" d="M6.066 12.924L6.658 12.494 5.927 12.494 5.701 11.799 5.475 12.494 4.744 12.494 5.335 12.924 5.109 13.619 5.701 13.19 6.292 13.619 6.066 12.924z"></path><path fill="#fff" d="M9.046 12.924L9.637 12.494 8.906 12.494 8.68 11.799 8.454 12.494 7.723 12.494 8.314 12.924 8.089 13.619 8.68 13.19 9.271 13.619 9.046 12.924z"></path><path fill="#fff" d="M12.025 12.924L12.616 12.494 11.885 12.494 11.659 11.799 11.433 12.494 10.702 12.494 11.294 12.924 11.068 13.619 11.659 13.19 12.251 13.619 12.025 12.924z"></path><path fill="#fff" d="M13.539 7.463L14.13 7.033 13.399 7.033 13.173 6.338 12.947 7.033 12.216 7.033 12.808 7.463 12.582 8.158 13.173 7.729 13.765 8.158 13.539 7.463z"></path><path fill="#fff" d="M4.601 11.104L5.193 10.674 4.462 10.674 4.236 9.979 4.01 10.674 3.279 10.674 3.87 11.104 3.644 11.799 4.236 11.369 4.827 11.799 4.601 11.104z"></path><path fill="#fff" d="M7.58 11.104L8.172 10.674 7.441 10.674 7.215 9.979 6.989 10.674 6.258 10.674 6.849 11.104 6.623 11.799 7.215 11.369 7.806 11.799 7.58 11.104z"></path><path fill="#fff" d="M10.56 11.104L11.151 10.674 10.42 10.674 10.194 9.979 9.968 10.674 9.237 10.674 9.828 11.104 9.603 11.799 10.194 11.369 10.785 11.799 10.56 11.104z"></path><path fill="#fff" d="M13.539 11.104L14.13 10.674 13.399 10.674 13.173 9.979 12.947 10.674 12.216 10.674 12.808 11.104 12.582 11.799 13.173 11.369 13.765 11.799 13.539 11.104z"></path><path fill="#fff" d="M4.601 14.744L5.193 14.315 4.462 14.315 4.236 13.619 4.01 14.315 3.279 14.315 3.87 14.744 3.644 15.44 4.236 15.01 4.827 15.44 4.601 14.744z"></path><path fill="#fff" d="M7.58 14.744L8.172 14.315 7.441 14.315 7.215 13.619 6.989 14.315 6.258 14.315 6.849 14.744 6.623 15.44 7.215 15.01 7.806 15.44 7.58 14.744z"></path><path fill="#fff" d="M10.56 14.744L11.151 14.315 10.42 14.315 10.194 13.619 9.968 14.315 9.237 14.315 9.828 14.744 9.603 15.44 10.194 15.01 10.785 15.44 10.56 14.744z"></path><path fill="#fff" d="M13.539 14.744L14.13 14.315 13.399 14.315 13.173 13.619 12.947 14.315 12.216 14.315 12.808 14.744 12.582 15.44 13.173 15.01 13.765 15.44 13.539 14.744z"></path></svg>
</div>
</div>
</div>
<div class="container">
<div class="sub-header-container">
<div class="update-info-container">
<label class="update-info-label" id="timeDiff"></label>
</div>
<div class="sort-container">
<label class="sort-label">🔀 <span id="sort-label-text">Сортировка по</span></label>
<select id="sort-dropdown" class="sort-dropdown">
<option value="default">рейтингу</option>
<option value="pub_date">дате публикации</option>
<option value="issue_id">добавлению на HF</option>
</select>
</div>
</div>
<div class="sub-header-container-2">
<div class="category-toggle-container">
<div class="svg-container">
<span id="category-toggle">🏷️ Фильтр</span>
<svg height="3" width="200">
<line x1="0" y1="0" x2="200" y2="0"
stroke="black"
stroke-width="2"
stroke-dasharray="3, 3" />
</svg>
</div>
</div>
<div class="category-option-container" id="category-options">
<label class="pointer" for="filter-logic-or"><input type="radio" id="filter-logic-or" name="filter-logic" value="or"> A∪B</label>
<label class="pointer" for="filter-logic-and"><input type="radio" id="filter-logic-and" name="filter-logic" value="and"> A∩B</label>
</div>
</div>
<div class="category-filters" id="category-filters">
<span class="clear-categories" id="clear-categories">🧹</span>
<!-- Categories -->
</div>
<main id="articles-container">
<!-- Articles -->
</main>
</div>
<footer>
<div class="container">
<p><a style="color:white;" href="https://t.me/doomgrad">doomgrad</a> ✖️ <a style="color:white;" href="https://huggingface.co/papers">hugging face</a></p>
</div>
</footer>
<script>
// Language handling
let currentLang = localStorage.getItem('selectedLang') || 'en';
let feedDate = {'ru': '27 сентября', 'en': 'September 27', 'zh': '9月27日'};
let feedDateNext = {'ru': '30.09', 'en': '09/30', 'zh': '9月30日'};
let feedDatePrev = {'ru': '26.09', 'en': '09/26', 'zh': '9月26日'};
let filterLabel = {'ru': 'Фильтр', 'en': 'Topics', 'zh': '主题筛选'}
let publishedLabel = {'ru': 'статья от ', 'en': 'published on ', 'zh': '发表于'}
let sortLabel = {'ru': 'Сортировка по', 'en': 'Sort by', 'zh': '排序方式'}
let paperLabel = {'ru': 'Статья', 'en': 'Paper', 'zh': '论文'}
let topMonthLabel = {'ru': 'Месяц', 'en': 'Month', 'zh': '月度论文'}
let topDayLabel = {'ru': 'День', 'en': 'Day', 'zh': '日度论文'}
function initializeLanguageFlags() {
const flags = document.querySelectorAll('.flag-svg');
flags.forEach(flag => {
if (flag.dataset.lang === currentLang) {
flag.classList.add('active');
}
flag.addEventListener('click', () => {
flags.forEach(f => f.classList.remove('active'));
flag.classList.add('active');
currentLang = flag.dataset.lang;
localStorage.setItem('selectedLang', currentLang);
updateTimeDiffs();
updateLocalization();
filterAndRenderArticles();
});
});
}
function toggleTheme() {
const body = document.body;
body.classList.toggle('light-theme');
body.classList.toggle('dark-theme');
const isDarkMode = body.classList.contains('dark-theme');
localStorage.setItem('darkMode', isDarkMode);
if (isDarkMode) {
const title = document.getElementById('doomgrad');
title.innerHTML = "hf nightly";
const titleSign = document.getElementById('doomgrad-icon');
titleSign.classList.add('rotate');
} else {
const title = document.getElementById('doomgrad');
title.innerHTML = "hf daily";
const titleSign = document.getElementById('doomgrad-icon');
titleSign.classList.remove('rotate');
}
}
const articlesData = [{'id': 'https://huggingface.co/papers/2409.17481', 'title': 'MaskLLM: Learnable Semi-Structured Sparsity for Large Language Models', 'url': 'https://huggingface.co/papers/2409.17481', 'abstract': "Large Language Models (LLMs) are distinguished by their massive parameter counts, which typically result in significant redundancy. This work introduces MaskLLM, a learnable pruning method that establishes Semi-structured (or ``N:M'') Sparsity in LLMs, aimed at reducing computational overhead during inference. Instead of developing a new importance criterion, MaskLLM explicitly models N:M patterns as a learnable distribution through Gumbel Softmax sampling. This approach facilitates end-to-end training on large-scale datasets and offers two notable advantages: 1) High-quality Masks - our method effectively scales to large datasets and learns accurate masks; 2) Transferability - the probabilistic modeling of mask distribution enables the transfer learning of sparsity across domains or tasks. We assessed MaskLLM using 2:4 sparsity on various LLMs, including LLaMA-2, Nemotron-4, and GPT-3, with sizes ranging from 843M to 15B parameters, and our empirical results show substantial improvements over state-of-the-art methods. For instance, leading approaches achieve a perplexity (PPL) of 10 or greater on Wikitext compared to the dense model's 5.12 PPL, but MaskLLM achieves a significantly lower 6.72 PPL solely by learning the masks with frozen weights. Furthermore, MaskLLM's learnable nature allows customized masks for lossless application of 2:4 sparsity to downstream tasks or domains. Code is available at https://github.com/NVlabs/MaskLLM.", 'score': 46, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': '9bb73b25aad1001a', 'authors': ['Gongfan Fang', 'Hongxu Yin', 'Saurav Muralidharan', 'Greg Heinrich', 'Jeff Pool', 'Jan Kautz', 'Pavlo Molchanov', 'Xinchao Wang'], 'affiliations': ['NVIDIA', 'National University of Singapore'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17481.jpg', 'data': {'categories': ['#dataset', '#training', '#inference', '#optimization', '#transfer_learning', '#open_source', '#architecture'], 'emoji': '✂️', 'ru': {'title': 'MaskLLM: Эффективное обучение разреженности в больших языковых моделях', 'desc': 'Статья представляет MaskLLM - метод обучаемой обрезки для создания полуструктурированной разреженности в больших языковых моделях (LLM). MaskLLM моделирует паттерны N:M как обучаемое распределение с помощью выборки Гумбеля-Софтмакса, что позволяет проводить сквозное обучение на крупномасштабных наборах данных. Метод обеспечивает высококачественные маски и возможность переноса обучения разреженности между доменами или задачами. Эмпирические результаты показывают значительные улучшения по сравнению с современными методами при применении 2:4 разреженности к различным LLM.'}, 'en': {'title': 'Efficient Pruning of Large Language Models with MaskLLM', 'desc': 'This paper presents MaskLLM, a novel method for pruning large language models (LLMs) by introducing Semi-structured (N:M) sparsity to reduce computational costs during inference. MaskLLM utilizes Gumbel Softmax sampling to model N:M patterns as a learnable distribution, allowing for end-to-end training on extensive datasets. The method not only generates high-quality masks that scale effectively but also enables transfer learning of sparsity across different tasks. Empirical results demonstrate that MaskLLM outperforms existing methods, achieving lower perplexity scores while maintaining the ability to apply customized masks for various downstream applications.'}, 'zh': {'title': 'MaskLLM:高效稀疏化的大型语言模型', 'desc': '大型语言模型(LLMs)通常具有大量参数,导致计算冗余。本文提出了一种名为MaskLLM的可学习剪枝方法,通过建立半结构化(或“N:M”)稀疏性来减少推理过程中的计算开销。MaskLLM通过Gumbel Softmax采样显式建模N:M模式,支持在大规模数据集上进行端到端训练。实验结果表明,MaskLLM在多个LLM上实现了显著的性能提升,且其可学习特性使得在不同任务或领域间的稀疏性转移成为可能。'}}}, {'id': 'https://huggingface.co/papers/2409.18042', 'title': 'EMOVA: Empowering Language Models to See, Hear and Speak with Vivid Emotions', 'url': 'https://huggingface.co/papers/2409.18042', 'abstract': 'GPT-4o, an omni-modal model that enables vocal conversations with diverse emotions and tones, marks a milestone for omni-modal foundation models. However, empowering Large Language Models to perceive and generate images, texts, and speeches end-to-end with publicly available data remains challenging in the open-source community. Existing vision-language models rely on external tools for the speech processing, while speech-language models still suffer from limited or even without vision-understanding abilities. To address this gap, we propose EMOVA (EMotionally Omni-present Voice Assistant), to enable Large Language Models with end-to-end speech capabilities while maintaining the leading vision-language performance. With a semantic-acoustic disentangled speech tokenizer, we notice surprisingly that omni-modal alignment can further enhance vision-language and speech abilities compared with the corresponding bi-modal aligned counterparts. Moreover, a lightweight style module is proposed for flexible speech style controls (e.g., emotions and pitches). For the first time, EMOVA achieves state-of-the-art performance on both the vision-language and speech benchmarks, and meanwhile, supporting omni-modal spoken dialogue with vivid emotions.', 'score': 36, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': '227cd783a8a6d39c', 'authors': ['Kai Chen', 'Yunhao Gou', 'Runhui Huang', 'Zhili Liu', 'Daxin Tan', 'Jing Xu', 'Chunwei Wang', 'Yi Zhu', 'Yihan Zeng', 'Kuo Yang', 'Dingdong Wang', 'Kun Xiang', 'Haoyuan Li', 'Haoli Bai', 'Jianhua Han', 'Xiaohui Li', 'Weike Jin', 'Nian Xie', 'Yu Zhang', 'James T. Kwok', 'Hengshuang Zhao', 'Xiaodan Liang', 'Dit-Yan Yeung', 'Xiao Chen', 'Zhenguo Li', 'Wei Zhang', 'Qun Liu', 'Jun Yao', 'Lanqing Hong', 'Lu Hou', 'Hang Xu'], 'affiliations': ['Hong Kong University of Science and Technology', 'Huawei Noahs Ark Lab', 'Southern University of Science and Technology', 'Sun Yat-sen University', 'The Chinese University of Hong Kong', 'The University of Hong Kong'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.18042.jpg', 'data': {'categories': ['#audio', '#cv', '#benchmark', '#alignment', '#open_source', '#architecture', '#synthetic', '#multimodal'], 'emoji': '🗣️', 'ru': {'title': 'EMOVA: прорыв в омнимодальном ИИ с эмоциональным речевым интерфейсом', 'desc': 'EMOVA - это омнимодальная модель, объединяющая возможности обработки изображений, текста и речи. Она использует семантико-акустический разделенный токенизатор речи для улучшения языковых и речевых способностей. EMOVA достигает передовых результатов как в задачах зрения-языка, так и в речевых тестах. Модель также поддерживает омнимодальный разговорный диалог с различными эмоциями.'}, 'en': {'title': 'EMOVA: Bridging Speech and Vision for Emotionally Intelligent Conversations', 'desc': 'The paper introduces EMOVA, a new model designed to enhance Large Language Models (LLMs) by integrating speech capabilities with vision-language performance. EMOVA utilizes a semantic-acoustic disentangled speech tokenizer, which allows for better alignment between visual and auditory data, improving overall model performance. Additionally, it features a lightweight style module that enables control over speech styles, such as emotions and pitches. This approach achieves state-of-the-art results in both vision-language and speech tasks, facilitating more expressive and emotionally aware spoken dialogues.'}, 'zh': {'title': '情感全能语音助手:打破模态界限的创新', 'desc': '本论文介绍了EMOVA(情感全能语音助手),这是一个能够实现端到端语音能力的大型语言模型。EMOVA通过语义-声学解耦的语音标记器,提升了视觉-语言和语音能力的对齐效果。与现有的双模态模型相比,EMOVA在视觉-语言和语音基准测试中都达到了最先进的性能。该模型还引入了轻量级风格模块,支持灵活的语音风格控制,如情感和音调。'}}}, {'id': 'https://huggingface.co/papers/2409.18125', 'title': 'LLaVA-3D: A Simple yet Effective Pathway to Empowering LMMs with 3D-awareness', 'url': 'https://huggingface.co/papers/2409.18125', 'abstract': 'Recent advancements in Large Multimodal Models (LMMs) have greatly enhanced their proficiency in 2D visual understanding tasks, enabling them to effectively process and understand images and videos. However, the development of LMMs with 3D-awareness for 3D scene understanding has been hindered by the lack of large-scale 3D vision-language datasets and powerful 3D encoders. In this paper, we introduce a simple yet effective framework called LLaVA-3D. Leveraging the strong 2D understanding priors from LLaVA, our LLaVA-3D efficiently adapts LLaVA for 3D scene understanding without compromising 2D understanding capabilities. To achieve this, we employ a simple yet effective representation, 3D Patch, which connects 2D CLIP patch features with their corresponding positions in 3D space. By integrating the 3D Patches into 2D LMMs and employing joint 2D and 3D vision-language instruction tuning, we establish a unified architecture for both 2D image understanding and 3D scene understanding. Experimental results show that LLaVA-3D converges 3.5x faster than existing 3D LMMs when trained on 3D vision-language datasets. Moreover, LLaVA-3D not only achieves state-of-the-art performance across various 3D tasks but also maintains comparable 2D image understanding and vision-language conversation capabilities with LLaVA.', 'score': 33, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': '4ca82aa848fc15ec', 'authors': ['Chenming Zhu', 'Tai Wang', 'Wenwei Zhang', 'Jiangmiao Pang', 'Xihui Liu'], 'affiliations': ['Shanghai AI Laboratory', 'The University of Hong Kong'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.18125.jpg', 'data': {'categories': ['#dataset', '#cv', '#training', '#graphs', '#optimization', '#transfer_learning', '#architecture', '#multimodal', '#3d'], 'emoji': '🧠', 'ru': {'title': 'LLaVA-3D: Эффективный переход от 2D к 3D пониманию для мультимодальных моделей', 'desc': 'Статья представляет LLaVA-3D - фреймворк для адаптации моделей 2D понимания изображений к задачам 3D понимания сцен. Авторы используют концепцию 3D Patch, связывающую 2D признаки CLIP с их позициями в 3D пространстве. LLaVA-3D обучается быстрее существующих 3D моделей и достигает state-of-the-art результатов в 3D задачах. При этом модель сохраняет способности к пониманию 2D изображений на уровне базовой LLaVA.'}, 'en': {'title': 'Bridging 2D and 3D: LLaVA-3D Unifies Visual Understanding', 'desc': 'This paper presents LLaVA-3D, a framework designed to enhance Large Multimodal Models (LMMs) for 3D scene understanding while retaining their 2D visual comprehension abilities. The authors address the challenge of limited 3D vision-language datasets and the need for robust 3D encoders by introducing a novel representation called 3D Patch, which links 2D features to their 3D spatial locations. By integrating these 3D Patches into existing 2D LMMs and utilizing joint instruction tuning, LLaVA-3D achieves a unified approach for processing both 2D and 3D data. Experimental results demonstrate that LLaVA-3D trains 3.5 times faster than current 3D LMMs and excels in various 3D tasks while maintaining strong performance in 2D image understanding.'}, 'zh': {'title': 'LLaVA-3D:统一2D与3D场景理解的创新框架', 'desc': '本文介绍了一种新的框架LLaVA-3D,旨在提升大型多模态模型(LMMs)在3D场景理解方面的能力。通过结合2D CLIP特征与3D空间位置,LLaVA-3D有效地将2D理解能力扩展到3D场景中。该框架采用简单有效的3D Patch表示,并通过联合的2D和3D视觉语言指令调优,建立了统一的架构。实验结果表明,LLaVA-3D在训练速度上比现有的3D LMMs快3.5倍,并在多个3D任务上实现了最先进的性能,同时保持了与LLaVA相当的2D图像理解能力。'}}}, {'id': 'https://huggingface.co/papers/2409.18124', 'title': 'Lotus: Diffusion-based Visual Foundation Model for High-quality Dense Prediction', 'url': 'https://huggingface.co/papers/2409.18124', 'abstract': 'Leveraging the visual priors of pre-trained text-to-image diffusion models offers a promising solution to enhance zero-shot generalization in dense prediction tasks. However, existing methods often uncritically use the original diffusion formulation, which may not be optimal due to the fundamental differences between dense prediction and image generation. In this paper, we provide a systemic analysis of the diffusion formulation for the dense prediction, focusing on both quality and efficiency. And we find that the original parameterization type for image generation, which learns to predict noise, is harmful for dense prediction; the multi-step noising/denoising diffusion process is also unnecessary and challenging to optimize. Based on these insights, we introduce Lotus, a diffusion-based visual foundation model with a simple yet effective adaptation protocol for dense prediction. Specifically, Lotus is trained to directly predict annotations instead of noise, thereby avoiding harmful variance. We also reformulate the diffusion process into a single-step procedure, simplifying optimization and significantly boosting inference speed. Additionally, we introduce a novel tuning strategy called detail preserver, which achieves more accurate and fine-grained predictions. Without scaling up the training data or model capacity, Lotus achieves SoTA performance in zero-shot depth and normal estimation across various datasets. It also significantly enhances efficiency, being hundreds of times faster than most existing diffusion-based methods.', 'score': 31, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': '55be564bbee47eed', 'authors': ['Jing He', 'Haodong Li', 'Wei Yin', 'Yixun Liang', 'Leheng Li', 'Kaiqiang Zhou', 'Hongbo Zhang', 'Bingbing Liu', 'Ying-Cong Chen'], 'affiliations': ['HKUST', 'HKUST(GZ)', 'Noahs Ark Lab', 'University of Adelaide'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.18124.jpg', 'data': {'categories': ['#dataset', '#cv', '#inference', '#optimization', '#transfer_learning', '#diffusion', '#architecture'], 'emoji': '🌸', 'ru': {'title': 'Lotus: Эффективное плотное предсказание с помощью оптимизированной диффузионной модели', 'desc': 'Статья представляет Lotus - новую модель машинного обучения для решения задач плотного предсказания на основе диффузионных моделей. Авторы предлагают изменения в стандартной формулировке диффузионного процесса, оптимизируя его для задач плотного предсказания. Lotus обучается напрямую предсказывать аннотации вместо шума и использует одношаговый процесс диффузии, что значительно ускоряет вывод. Модель достигает передовых результатов в задачах оценки глубины и нормалей без дополнительного обучения.'}, 'en': {'title': 'Lotus: Revolutionizing Dense Prediction with Efficient Diffusion', 'desc': 'This paper presents Lotus, a new diffusion-based visual foundation model designed to improve zero-shot generalization in dense prediction tasks. The authors analyze the limitations of traditional diffusion methods, which are primarily suited for image generation, and highlight their inefficiencies when applied to dense prediction. By directly predicting annotations instead of noise and reformulating the diffusion process into a single-step procedure, Lotus simplifies optimization and enhances inference speed. The model achieves state-of-the-art performance in depth and normal estimation without requiring additional training data or increased model size.'}, 'zh': {'title': 'Lotus:高效的密集预测扩散模型', 'desc': '本文提出了一种新的方法,利用预训练的文本到图像扩散模型来提高密集预测任务的零-shot泛化能力。我们分析了现有扩散模型在密集预测中的不足,发现原有的噪声预测参数化方式对密集预测有害。为此,我们引入了Lotus模型,直接预测标注而非噪声,并将扩散过程简化为单步程序,从而提高了优化效率和推理速度。Lotus在多个数据集上实现了最先进的零-shot深度和法线估计性能,同时在效率上也大幅提升。'}}}, {'id': 'https://huggingface.co/papers/2409.14254', 'title': 'Instruction Following without Instruction Tuning', 'url': 'https://huggingface.co/papers/2409.14254', 'abstract': "Instruction tuning commonly means finetuning a language model on instruction-response pairs. We discover two forms of adaptation (tuning) that are deficient compared to instruction tuning, yet still yield instruction following; we call this implicit instruction tuning. We first find that instruction-response pairs are not necessary: training solely on responses, without any corresponding instructions, yields instruction following. This suggests pretrained models have an instruction-response mapping which is revealed by teaching the model the desired distribution of responses. However, we then find it's not necessary to teach the desired distribution of responses: instruction-response training on narrow-domain data like poetry still leads to broad instruction-following behavior like recipe generation. In particular, when instructions are very different from those in the narrow finetuning domain, models' responses do not adhere to the style of the finetuning domain. To begin to explain implicit instruction tuning, we hypothesize that very simple changes to a language model's distribution yield instruction following. We support this by hand-writing a rule-based language model which yields instruction following in a product-of-experts with a pretrained model. The rules are to slowly increase the probability of ending the sequence, penalize repetition, and uniformly change 15 words' probabilities. In summary, adaptations made without being designed to yield instruction following can do so implicitly.", 'score': 27, 'issue_id': 1, 'pub_date': '2024-09-21', 'pub_date_card': {'ru': '21 сентября', 'en': 'September 21', 'zh': '9月21日'}, 'hash': '928d018d2936e022', 'authors': ['John Hewitt', 'Nelson F. Liu', 'Percy Liang', 'Christopher D. Manning'], 'affiliations': ['Department of Computer Science, Stanford University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.14254.jpg', 'data': {'categories': ['#reasoning', '#training', '#interpretability', '#alignment', '#architecture'], 'emoji': '🧠', 'ru': {'title': 'Скрытые возможности языковых моделей: неявное обучение следованию инструкциям', 'desc': "Исследователи обнаружили, что языковые модели могут научиться следовать инструкциям без явного обучения на парах инструкция-ответ. Этот феномен назван 'неявной настройкой на инструкции'. Выяснилось, что достаточно обучения только на ответах или даже на узкоспециализированных данных для получения широких навыков следования инструкциям. Авторы предполагают, что даже простые изменения в распределении вероятностей языковой модели могут привести к способности следовать инструкциям."}, 'en': {'title': 'Unlocking Instruction Following Without Explicit Instructions', 'desc': "This paper explores a new concept called implicit instruction tuning, which shows that language models can learn to follow instructions even without explicit instruction-response pairs. The authors demonstrate that training a model solely on responses can still lead to effective instruction following, suggesting that pretrained models already have an inherent understanding of instruction-response mappings. They also find that training on narrow-domain data can produce broad instruction-following behavior, indicating that the model can generalize beyond its training context. The study proposes that simple adjustments to a model's output distribution can facilitate this implicit learning process."}, 'zh': {'title': '隐式指令调优:无需指令也能实现指令跟随', 'desc': '本文探讨了指令调优的概念,发现有两种适应形式虽然不如指令调优有效,但仍能实现指令跟随。研究表明,仅通过响应进行训练,而不需要对应的指令,也能使模型遵循指令。这表明预训练模型内部存在指令与响应的映射关系。此外,作者提出简单的模型调整可以实现指令跟随,甚至在狭窄领域的数据上进行训练也能产生广泛的指令跟随行为。'}}}, {'id': 'https://huggingface.co/papers/2409.17422', 'title': 'Discovering the Gems in Early Layers: Accelerating Long-Context LLMs with 1000x Input Token Reduction', 'url': 'https://huggingface.co/papers/2409.17422', 'abstract': 'Large Language Models (LLMs) have demonstrated remarkable capabilities in handling long context inputs, but this comes at the cost of increased computational resources and latency. Our research introduces a novel approach for the long context bottleneck to accelerate LLM inference and reduce GPU memory consumption. Our research demonstrates that LLMs can identify relevant tokens in the early layers before generating answers to a query. Leveraging this insight, we propose an algorithm that uses early layers of an LLM as filters to select and compress input tokens, significantly reducing the context length for subsequent processing. Our method, GemFilter, demonstrates substantial improvements in both speed and memory efficiency compared to existing techniques, such as standard attention and SnapKV/H2O. Notably, it achieves a 2.4times speedup and 30\\% reduction in GPU memory usage compared to SOTA methods. Evaluation on the Needle in a Haystack task shows that GemFilter significantly outperforms standard attention, SnapKV and demonstrates comparable performance on the LongBench challenge. GemFilter is simple, training-free, and broadly applicable across different LLMs. Crucially, it provides interpretability by allowing humans to inspect the selected input sequence. These findings not only offer practical benefits for LLM deployment, but also enhance our understanding of LLM internal mechanisms, paving the way for further optimizations in LLM design and inference. Our code is available at https://github.com/SalesforceAIResearch/GemFilter.', 'score': 24, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': '830f07f8f88f0a79', 'authors': ['Zhenmei Shi', 'Yifei Ming', 'Xuan-Phi Nguyen', 'Yingyu Liang', 'Shafiq Joty'], 'affiliations': [], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17422.jpg', 'data': {'categories': ['#long_context', '#training', '#inference', '#interpretability', '#optimization', '#open_source', '#architecture'], 'emoji': '🚀', 'ru': {'title': 'GemFilter: Ускорение LLM без потери качества', 'desc': 'Исследователи представили новый метод GemFilter для ускорения вывода больших языковых моделей (LLM) и уменьшения потребления памяти GPU при работе с длинным контекстом. GemFilter использует ранние слои LLM в качестве фильтров для выбора и сжатия входных токенов, значительно сокращая длину контекста для последующей обработки. Метод демонстрирует существенное улучшение скорости и эффективности использования памяти по сравнению с существующими техниками. GemFilter также обеспечивает интерпретируемость, позволяя людям проверять выбранную входную последовательность.'}, 'en': {'title': 'Accelerating LLMs with Efficient Token Filtering', 'desc': 'This paper presents GemFilter, a new method designed to improve the efficiency of Large Language Models (LLMs) when processing long context inputs. By utilizing early layers of the LLM to filter and compress input tokens, GemFilter reduces the amount of data that needs to be processed in later layers, leading to faster inference times and lower GPU memory usage. The results show that GemFilter achieves a 2.4 times speedup and a 30% reduction in memory consumption compared to state-of-the-art techniques. Additionally, it provides interpretability by allowing users to examine the selected input tokens, enhancing both practical deployment and understanding of LLMs.'}, 'zh': {'title': 'GemFilter:加速大型语言模型的推理与内存优化', 'desc': '大型语言模型(LLMs)在处理长上下文输入方面表现出色,但这需要更多的计算资源和延迟。我们的研究提出了一种新方法,旨在加速LLM推理并减少GPU内存消耗。我们发现LLMs可以在生成答案之前,在早期层识别相关的输入标记。基于这一发现,我们提出的GemFilter算法利用LLM的早期层作为过滤器,选择和压缩输入标记,从而显著减少后续处理的上下文长度。'}}}, {'id': 'https://huggingface.co/papers/2409.17565', 'title': 'Pixel-Space Post-Training of Latent Diffusion Models', 'url': 'https://huggingface.co/papers/2409.17565', 'abstract': 'Latent diffusion models (LDMs) have made significant advancements in the field of image generation in recent years. One major advantage of LDMs is their ability to operate in a compressed latent space, allowing for more efficient training and deployment. However, despite these advantages, challenges with LDMs still remain. For example, it has been observed that LDMs often generate high-frequency details and complex compositions imperfectly. We hypothesize that one reason for these flaws is due to the fact that all pre- and post-training of LDMs are done in latent space, which is typically 8 times 8 lower spatial-resolution than the output images. To address this issue, we propose adding pixel-space supervision in the post-training process to better preserve high-frequency details. Experimentally, we show that adding a pixel-space objective significantly improves both supervised quality fine-tuning and preference-based post-training by a large margin on a state-of-the-art DiT transformer and U-Net diffusion models in both visual quality and visual flaw metrics, while maintaining the same text alignment quality.', 'score': 19, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': 'fa618de81a80ad24', 'authors': ['Christina Zhang', 'Simran Motwani', 'Matthew Yu', 'Ji Hou', 'Felix Juefei-Xu', 'Sam Tsai', 'Peter Vajda', 'Zijian He', 'Jialiang Wang'], 'affiliations': ['Meta GenAI, Menlo Park, CA', 'Princeton University, Princeton, NJ'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17565.jpg', 'data': {'categories': ['#cv', '#training', '#optimization', '#diffusion', '#architecture'], 'emoji': '🖼️', 'ru': {'title': 'Улучшение качества генерации изображений через пиксельный контроль в латентных диффузионных моделях', 'desc': 'Латентные диффузионные модели (LDM) значительно продвинулись в области генерации изображений, но всё ещё имеют проблемы с высокочастотными деталями и сложными композициями. Авторы предполагают, что это связано с обучением в латентном пространстве с низким разрешением. Они предлагают добавить контроль в пиксельном пространстве при пост-обучении для улучшения качества деталей. Эксперименты показывают, что этот подход значительно улучшает качество изображений и уменьшает визуальные дефекты в современных диффузионных моделях.'}, 'en': {'title': 'Enhancing Image Quality in Latent Diffusion Models with Pixel-Space Supervision', 'desc': 'Latent diffusion models (LDMs) are advanced techniques for generating images, leveraging a compressed latent space for efficient training. However, they struggle with producing high-frequency details and complex compositions accurately. This paper suggests that the issue arises because LDMs operate in a lower resolution latent space during training. To improve the quality of generated images, the authors propose incorporating pixel-space supervision in the post-training phase, which significantly enhances visual quality without compromising text alignment.'}, 'zh': {'title': '提升图像生成质量的潜在空间监督', 'desc': '潜在扩散模型(LDMs)在图像生成领域取得了显著进展。LDMs的一个主要优点是能够在压缩的潜在空间中操作,从而实现更高效的训练和部署。然而,LDMs仍然面临一些挑战,例如生成高频细节和复杂构图时的不足。为了解决这个问题,我们提出在后期训练过程中增加像素空间监督,以更好地保留高频细节,并通过实验验证了这一方法的有效性。'}}}, {'id': 'https://huggingface.co/papers/2409.14195', 'title': 'The Imperative of Conversation Analysis in the Era of LLMs: A Survey of Tasks, Techniques, and Trends', 'url': 'https://huggingface.co/papers/2409.14195', 'abstract': 'In the era of large language models (LLMs), a vast amount of conversation logs will be accumulated thanks to the rapid development trend of language UI. Conversation Analysis (CA) strives to uncover and analyze critical information from conversation data, streamlining manual processes and supporting business insights and decision-making. The need for CA to extract actionable insights and drive empowerment is becoming increasingly prominent and attracting widespread attention. However, the lack of a clear scope for CA leads to a dispersion of various techniques, making it difficult to form a systematic technical synergy to empower business applications. In this paper, we perform a thorough review and systematize CA task to summarize the existing related work. Specifically, we formally define CA task to confront the fragmented and chaotic landscape in this field, and derive four key steps of CA from conversation scene reconstruction, to in-depth attribution analysis, and then to performing targeted training, finally generating conversations based on the targeted training for achieving the specific goals. In addition, we showcase the relevant benchmarks, discuss potential challenges and point out future directions in both industry and academia. In view of current advancements, it is evident that the majority of efforts are still concentrated on the analysis of shallow conversation elements, which presents a considerable gap between the research and business, and with the assist of LLMs, recent work has shown a trend towards research on causality and strategic tasks which are sophisticated and high-level. The analyzed experiences and insights will inevitably have broader application value in business operations that target conversation logs.', 'score': 11, 'issue_id': 1, 'pub_date': '2024-09-21', 'pub_date_card': {'ru': '21 сентября', 'en': 'September 21', 'zh': '9月21日'}, 'hash': 'fc04ee445bfa493b', 'authors': ['Xinghua Zhang', 'Haiyang Yu', 'Yongbin Li', 'Minzheng Wang', 'Longze Chen', 'Fei Huang'], 'affiliations': ['Alibaba Group, China'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.14195.jpg', 'data': {'categories': ['#science', '#survey', '#training', '#data', '#benchmark', '#multimodal'], 'emoji': '💬', 'ru': {'title': 'Анализ разговоров: от поверхностного анализа к глубокому пониманию с помощью LLM', 'desc': 'Эта статья посвящена анализу разговоров (Conversation Analysis, CA) в контексте широкого распространения больших языковых моделей (LLM). Авторы систематизируют задачи CA, выделяя четыре ключевых этапа: реконструкция сцены разговора, глубокий анализ атрибуций, целевое обучение и генерация разговоров для достижения конкретных целей. В работе обсуждаются существующие методики, потенциальные проблемы и будущие направления исследований в этой области. Отмечается, что большинство текущих исследований сосредоточено на анализе поверхностных элементов разговора, но с помощью LLM наблюдается тенденция к изучению более сложных аспектов, таких как причинно-следственные связи и стратегические задачи.'}, 'en': {'title': 'Empowering Business Insights through Systematic Conversation Analysis', 'desc': 'This paper reviews the field of Conversation Analysis (CA) in the context of large language models (LLMs) and their ability to process conversation logs. It defines the CA task systematically, outlining four key steps: reconstructing conversation scenes, conducting in-depth attribution analysis, performing targeted training, and generating conversations for specific goals. The authors highlight the current focus on shallow conversation elements and the need for deeper analysis to bridge the gap between research and practical business applications. They also discuss benchmarks, challenges, and future directions for CA in both industry and academia, emphasizing the potential of LLMs to enhance strategic conversation tasks.'}, 'zh': {'title': '系统化对话分析,驱动商业洞察', 'desc': '在大型语言模型(LLMs)时代,随着语言用户界面的快速发展,积累了大量的对话日志。对话分析(CA)旨在从对话数据中提取和分析关键信息,以简化手动流程并支持商业洞察和决策。本文对CA任务进行了全面回顾和系统化,明确了CA的定义,并提出了从对话场景重建到深入归因分析、再到针对性训练的四个关键步骤。通过展示相关基准和讨论潜在挑战,本文指出了行业和学术界未来的发展方向。'}}}, {'id': 'https://huggingface.co/papers/2409.17280', 'title': 'Disco4D: Disentangled 4D Human Generation and Animation from a Single Image', 'url': 'https://huggingface.co/papers/2409.17280', 'abstract': 'We present Disco4D, a novel Gaussian Splatting framework for 4D human generation and animation from a single image. Different from existing methods, Disco4D distinctively disentangles clothings (with Gaussian models) from the human body (with SMPL-X model), significantly enhancing the generation details and flexibility. It has the following technical innovations. 1) Disco4D learns to efficiently fit the clothing Gaussians over the SMPL-X Gaussians. 2) It adopts diffusion models to enhance the 3D generation process, e.g., modeling occluded parts not visible in the input image. 3) It learns an identity encoding for each clothing Gaussian to facilitate the separation and extraction of clothing assets. Furthermore, Disco4D naturally supports 4D human animation with vivid dynamics. Extensive experiments demonstrate the superiority of Disco4D on 4D human generation and animation tasks. Our visualizations can be found in https://disco-4d.github.io/.', 'score': 9, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': 'b076d30e6256f634', 'authors': ['Hui En Pang', 'Shuai Liu', 'Zhongang Cai', 'Lei Yang', 'Tianwei Zhang', 'Ziwei Liu'], 'affiliations': ['S-Lab, Nanyang Technological University', 'SenseTime Research', 'Shanghai AI Laboratory'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17280.jpg', 'data': {'categories': ['#cv', '#games', '#diffusion', '#architecture', '#3d'], 'emoji': '👕', 'ru': {'title': 'Реалистичная генерация и анимация 3D-людей из одного фото', 'desc': 'Disco4D - это новая система для генерации и анимации 3D-моделей людей по одному изображению, основанная на методе Gaussian Splatting. Она отделяет одежду от тела человека, используя гауссовы модели для одежды и модель SMPL-X для тела. Система применяет диффузионные модели для улучшения процесса 3D-генерации и обучает кодирование идентичности для каждого гауссиана одежды. Disco4D позволяет создавать реалистичную 4D-анимацию людей с динамическими эффектами.'}, 'en': {'title': 'Revolutionizing 4D Human Generation with Disco4D', 'desc': 'Disco4D is a new framework that uses Gaussian Splatting to create and animate 4D human figures from just one image. It separates clothing from the human body using Gaussian models and the SMPL-X model, which improves detail and flexibility in the generated images. The framework incorporates diffusion models to better generate 3D representations, even for parts of the body that are not visible in the original image. Additionally, it includes a unique identity encoding for clothing, allowing for easier management of clothing assets and enabling dynamic 4D animations.'}, 'zh': {'title': 'Disco4D:从单图像生成动态4D人类模型', 'desc': 'Disco4D是一种新颖的高斯点云框架,用于从单张图像生成和动画化4D人类模型。与现有方法不同,Disco4D将服装(使用高斯模型)与人体(使用SMPL-X模型)有效分离,从而显著提高了生成的细节和灵活性。该方法通过高效拟合服装高斯模型和SMPL-X高斯模型,采用扩散模型增强3D生成过程,并为每个服装高斯学习身份编码,以便于分离和提取服装资产。此外,Disco4D自然支持生动的4D人类动画。'}}}, {'id': 'https://huggingface.co/papers/2409.14683', 'title': 'Reducing the Footprint of Multi-Vector Retrieval with Minimal Performance Impact via Token Pooling', 'url': 'https://huggingface.co/papers/2409.14683', 'abstract': 'Over the last few years, multi-vector retrieval methods, spearheaded by ColBERT, have become an increasingly popular approach to Neural IR. By storing representations at the token level rather than at the document level, these methods have demonstrated very strong retrieval performance, especially in out-of-domain settings. However, the storage and memory requirements necessary to store the large number of associated vectors remain an important drawback, hindering practical adoption. In this paper, we introduce a simple clustering-based token pooling approach to aggressively reduce the number of vectors that need to be stored. This method can reduce the space & memory footprint of ColBERT indexes by 50% with virtually no retrieval performance degradation. This method also allows for further reductions, reducing the vector count by 66%-to-75% , with degradation remaining below 5% on a vast majority of datasets. Importantly, this approach requires no architectural change nor query-time processing, and can be used as a simple drop-in during indexation with any ColBERT-like model.', 'score': 8, 'issue_id': 1, 'pub_date': '2024-09-23', 'pub_date_card': {'ru': '23 сентября', 'en': 'September 23', 'zh': '9月23日'}, 'hash': 'd7dda0c648e6ab9d', 'authors': ['Benjamin Clavié', 'Antoine Chaffin', 'Griffin Adams'], 'affiliations': ['Answer.AI Japan', 'Answer.AI USA', 'LightOn France'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.14683.jpg', 'data': {'categories': ['#rag', '#inference', '#graphs', '#optimization', '#data', '#benchmark'], 'emoji': '🗜️', 'ru': {'title': 'Эффективное сжатие индексов ColBERT без потери качества поиска', 'desc': 'Статья представляет новый подход к уменьшению объема хранимых векторов в многовекторных методах информационного поиска, таких как ColBERT. Авторы предлагают метод кластеризации токенов, который позволяет сократить объем индексов ColBERT на 50% без существенной потери производительности. Дальнейшее сокращение до 66-75% приводит к снижению эффективности менее чем на 5% для большинства наборов данных. Важно отметить, что этот метод не требует изменений в архитектуре модели и может быть легко интегрирован в процесс индексации.'}, 'en': {'title': 'Efficient Token Storage for Enhanced Retrieval Performance', 'desc': 'This paper presents a new method to improve multi-vector retrieval systems, particularly those based on ColBERT. The authors propose a clustering-based token pooling technique that significantly reduces the number of token-level vectors stored, addressing the high storage and memory demands of existing methods. Their approach can cut the storage requirements by 50% without losing retrieval accuracy, and even achieve reductions of 66% to 75% with minimal performance degradation. Importantly, this method is easy to implement, requiring no changes to the existing architecture or query processing, making it a practical enhancement for ColBERT-like models.'}, 'zh': {'title': '聚类池化:高效存储与检索的完美结合', 'desc': '近年来,多向量检索方法在神经信息检索中越来越受欢迎,尤其是ColBERT方法。该方法通过在标记级别存储表示,而不是在文档级别,展示了强大的检索性能,尤其是在域外设置中。然而,存储大量相关向量所需的存储和内存要求仍然是一个重要缺点,限制了其实际应用。本文提出了一种基于聚类的标记池化方法,可以大幅减少需要存储的向量数量,且几乎不影响检索性能。'}}}, {'id': 'https://huggingface.co/papers/2409.17580', 'title': 'Enhancing Structured-Data Retrieval with GraphRAG: Soccer Data Case Study', 'url': 'https://huggingface.co/papers/2409.17580', 'abstract': "Extracting meaningful insights from large and complex datasets poses significant challenges, particularly in ensuring the accuracy and relevance of retrieved information. Traditional data retrieval methods such as sequential search and index-based retrieval often fail when handling intricate and interconnected data structures, resulting in incomplete or misleading outputs. To overcome these limitations, we introduce Structured-GraphRAG, a versatile framework designed to enhance information retrieval across structured datasets in natural language queries. Structured-GraphRAG utilizes multiple knowledge graphs, which represent data in a structured format and capture complex relationships between entities, enabling a more nuanced and comprehensive retrieval of information. This graph-based approach reduces the risk of errors in language model outputs by grounding responses in a structured format, thereby enhancing the reliability of results. We demonstrate the effectiveness of Structured-GraphRAG by comparing its performance with that of a recently published method using traditional retrieval-augmented generation. Our findings show that Structured-GraphRAG significantly improves query processing efficiency and reduces response times. While our case study focuses on soccer data, the framework's design is broadly applicable, offering a powerful tool for data analysis and enhancing language model applications across various structured domains.", 'score': 7, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': 'c7496beca8061db3', 'authors': ['Zahra Sepasdar', 'Sushant Gautam', 'Cise Midoglu', 'Michael A. Riegler', 'Pål Halvorsen'], 'affiliations': ['Forzasys', 'OsloMet', 'SimulaMet'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17580.jpg', 'data': {'categories': ['#reasoning', '#graphs', '#rag', '#data', '#interpretability', '#architecture'], 'emoji': '🕸️', 'ru': {'title': 'Графовый подход для точного извлечения данных', 'desc': 'Статья представляет Structured-GraphRAG - новый фреймворк для улучшения извлечения информации из сложных структурированных датасетов. Он использует множественные графы знаний для более точного и полного поиска данных. Structured-GraphRAG повышает надежность результатов языковых моделей, опираясь на структурированный формат. Эксперименты показали значительное улучшение эффективности обработки запросов по сравнению с традиционными методами.'}, 'en': {'title': 'Revolutionizing Data Retrieval with Structured-GraphRAG', 'desc': 'This paper presents Structured-GraphRAG, a new framework aimed at improving information retrieval from complex datasets using natural language queries. It addresses the shortcomings of traditional methods like sequential search by leveraging multiple knowledge graphs, which organize data and highlight relationships between entities. By grounding language model outputs in structured data, Structured-GraphRAG enhances the accuracy and relevance of the retrieved information. The framework has been shown to significantly boost query processing efficiency and is applicable to various domains beyond the soccer data case study.'}, 'zh': {'title': '提升结构化数据检索的效率与准确性', 'desc': '本论文介绍了一种名为Structured-GraphRAG的信息检索框架,旨在提高对结构化数据集的检索效率。传统的数据检索方法在处理复杂数据时常常无法提供准确的信息,导致结果不完整或误导。Structured-GraphRAG利用多个知识图谱,以结构化的方式表示数据,捕捉实体之间的复杂关系,从而实现更全面的信息检索。通过与传统的检索增强生成方法进行比较,我们的研究表明,Structured-GraphRAG在查询处理效率和响应时间上都有显著改善。'}}}, {'id': 'https://huggingface.co/papers/2409.18121', 'title': 'Robot See Robot Do: Imitating Articulated Object Manipulation with Monocular 4D Reconstruction', 'url': 'https://huggingface.co/papers/2409.18121', 'abstract': "Humans can learn to manipulate new objects by simply watching others; providing robots with the ability to learn from such demonstrations would enable a natural interface specifying new behaviors. This work develops Robot See Robot Do (RSRD), a method for imitating articulated object manipulation from a single monocular RGB human demonstration given a single static multi-view object scan. We first propose 4D Differentiable Part Models (4D-DPM), a method for recovering 3D part motion from a monocular video with differentiable rendering. This analysis-by-synthesis approach uses part-centric feature fields in an iterative optimization which enables the use of geometric regularizers to recover 3D motions from only a single video. Given this 4D reconstruction, the robot replicates object trajectories by planning bimanual arm motions that induce the demonstrated object part motion. By representing demonstrations as part-centric trajectories, RSRD focuses on replicating the demonstration's intended behavior while considering the robot's own morphological limits, rather than attempting to reproduce the hand's motion. We evaluate 4D-DPM's 3D tracking accuracy on ground truth annotated 3D part trajectories and RSRD's physical execution performance on 9 objects across 10 trials each on a bimanual YuMi robot. Each phase of RSRD achieves an average of 87% success rate, for a total end-to-end success rate of 60% across 90 trials. Notably, this is accomplished using only feature fields distilled from large pretrained vision models -- without any task-specific training, fine-tuning, dataset collection, or annotation. Project page: https://robot-see-robot-do.github.io", 'score': 7, 'issue_id': 1, 'pub_date': '2024-09-26', 'pub_date_card': {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'}, 'hash': '1397b774b882bc6c', 'authors': ['Justin Kerr', 'Chung Min Kim', 'Mingxuan Wu', 'Brent Yi', 'Qianqian Wang', 'Ken Goldberg', 'Angjoo Kanazawa'], 'affiliations': ['UC Berkeley'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.18121.jpg', 'data': {'categories': ['#cv', '#optimization', '#games', '#open_source', '#architecture', '#robotics', '#3d'], 'emoji': '🤖', 'ru': {'title': 'Роботы учатся манипулировать объектами, наблюдая за людьми', 'desc': 'Статья представляет метод Robot See Robot Do (RSRD) для имитации манипуляций с шарнирными объектами роботами на основе наблюдения за действиями человека. Авторы предлагают технику 4D Differentiable Part Models (4D-DPM) для восстановления трехмерного движения частей объекта из монокулярного видео с помощью дифференцируемого рендеринга. RSRD использует восстановленную 4D-реконструкцию для планирования движений робота, воспроизводящих траектории частей объекта. Метод достигает 60% успеха в физическом выполнении задач без специфического обучения или аннотаций данных.'}, 'en': {'title': 'Learning by Watching: Robots Imitate Human Object Manipulation', 'desc': 'This paper introduces Robot See Robot Do (RSRD), a method that allows robots to learn how to manipulate objects by observing human demonstrations. It utilizes 4D Differentiable Part Models (4D-DPM) to extract 3D motion information from a single monocular video, enabling the robot to understand and replicate the intended object movements. The approach focuses on part-centric trajectories, allowing the robot to plan its arm motions based on the demonstrated behavior while respecting its own physical capabilities. The method shows promising results, achieving an average success rate of 87% in tracking and 60% in execution across multiple trials without requiring specific training or data collection.'}, 'zh': {'title': '让机器人通过观察学习新技能', 'desc': '本研究提出了一种名为机器人看机器人做(RSRD)的方法,使机器人能够通过观察人类的单一演示来学习操控物体。我们首先引入了4D可微分部件模型(4D-DPM),该模型能够从单目视频中恢复3D部件运动。RSRD通过规划双手臂运动来复制物体轨迹,专注于再现演示的意图行为,而不是简单模仿手的动作。实验结果显示,RSRD在多个物体上的成功率达到87%,并且在没有特定任务训练的情况下实现了60%的整体成功率。'}}}];
const articlesContainer = document.getElementById('articles-container');
const sortDropdown = document.getElementById('sort-dropdown');
const categoryFiltersContainer = document.getElementById('category-filters');
const categoryFiltersLogicOptions = document.getElementById('category-options');
const categoryToggle = document.getElementById('category-toggle');
const clearCategoriesButton = document.getElementById('clear-categories');
let selectedCategories = [];
let selectedArticles = [];
let sortBy = 'issue_id';
let showLimitHint = false;
let filterLogicIsAnd = false;
function getUrlParameters() {
const urlParams = new URLSearchParams(window.location.search);
const categoriesParam = urlParams.get('cat');
let categories = categoriesParam ? categoriesParam.split(',') : [];
categories = categories.map(element => `#${element}`);
return categories
}
function updateUrlWithCategories() {
let cleanedCategories = selectedCategories.map(element => element.replace(/^#/, ''));
const newUrl = cleanedCategories.length > 0
? `${window.location.pathname}?cat=${cleanedCategories.join(',')}`
: window.location.pathname;
console.log("cleanedCategories", cleanedCategories)
window.history.pushState({}, '', newUrl);
}
function loadSettings() {
const themeToggle = document.getElementById('theme-toggle');
const sortDropdown = document.getElementById('sort-dropdown');
const isDarkMode = localStorage.getItem('darkMode') === 'true';
let settingSortBy = localStorage.getItem('sort_by');
filterLogicIsAnd = localStorage.getItem('filter_logic_is_and') === 'true';
if (isDarkMode) {
document.body.classList.remove('light-theme');
document.body.classList.add('dark-theme');
themeToggle.checked = true;
const title = document.getElementById('doomgrad');
title.innerHTML = "hf nightly";
const titleSign = document.getElementById('doomgrad-icon');
titleSign.classList.add('rotate');
}
if ((!settingSortBy) || (settingSortBy === 'null')) {
settingSortBy = 'issue_id';
}
if (filterLogicIsAnd) {
document.getElementById('filter-logic-and').checked = true;
} else {
document.getElementById('filter-logic-or').checked = true;
}
sortDropdown.value = settingSortBy;
sortBy = settingSortBy;
}
document.getElementById('theme-toggle').addEventListener('change', toggleTheme);
document.getElementById('filter-logic-and').addEventListener('change', () => {
filterLogicIsAnd = true;
localStorage.setItem('filter_logic_is_and', 'true');
filterAndRenderArticles();
updateSelectedArticlesTitle();
});
document.getElementById('filter-logic-or').addEventListener('change', () => {
filterLogicIsAnd = false;
localStorage.setItem('filter_logic_is_and', 'false');
filterAndRenderArticles();
updateSelectedArticlesTitle();
});
function getUniqueCategories(articles) {
const categories = new Set();
articles.forEach(article => {
if (article.data && article.data.categories) {
article.data.categories.forEach(cat => categories.add(cat));
}
});
let res = Array.from(categories);
res.sort();
return res;
}
function createCategoryButtons() {
//const categories = getUniqueCategories(articlesData);
const categories = ['#3d (3)', '#agents', '#agi', '#alignment (2)', '#architecture (10)', '#audio (1)', '#benchmark (3)', '#cv (6)', '#data (3)', '#dataset (3)', '#diffusion (3)', '#ethics', '#games (2)', '#graphs (3)', '#hallucinations', '#healthcare', '#inference (4)', '#interpretability (3)', '#leakage', '#long_context (1)', '#low_resource', '#machine_translation', '#math', '#multilingual', '#multimodal (3)', '#open_source (4)', '#optimization (7)', '#plp', '#rag (2)', '#reasoning (2)', '#rl', '#rlhf', '#robotics (1)', '#science (1)', '#security', '#small_models', '#story_generation', '#survey (1)', '#synthetic (1)', '#training (6)', '#transfer_learning (3)', '#video'];
categories.forEach(category => {
let catNameSplitted = category.split(/(\s+)/);
let catName = catNameSplitted[0];
const button = document.createElement('span');
button.textContent = catName;
button.className = 'category-button';
if (catNameSplitted.length < 2) {
button.classList.add('inactive');
};
button.onclick = () => toggleCategory(catName, button);
categoryFiltersContainer.appendChild(button);
});
}
function toggleCategory(category, button) {
const index = selectedCategories.indexOf(category);
if (index === -1) {
selectedCategories.push(category);
button.classList.add('active');
} else {
selectedCategories.splice(index, 1);
button.classList.remove('active');
}
filterAndRenderArticles();
saveCategorySelection();
updateSelectedArticlesTitle();
updateUrlWithCategories();
setFilterOptionsVisibility();
}
function saveCategorySelection() {
localStorage.setItem('selectedCategories', JSON.stringify(selectedCategories));
}
function updateSelectedArticlesTitle() {
if ((selectedArticles.length === articlesData.length) & (selectedCategories.length === 0)) {
categoryToggle.textContent = `🏷️ ${filterLabel[currentLang]}`;
} else {
categoryToggle.textContent = `🏷️ ${filterLabel[currentLang]} (${formatArticlesTitle(selectedArticles.length, currentLang)})`;
}
}
function cleanCategorySelection() {
localStorage.setItem('selectedCategories', JSON.stringify('[]'));
}