-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path2024-09-26.html
1247 lines (1176 loc) · 105 KB
/
2024-09-26.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html>
<head>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-C1CRWDNJ1J"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-C1CRWDNJ1J');
</script>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"><title>HF. 12 papers. September 26.</title>
<link rel="icon" href="favicon.svg" sizes="any" type="image/svg+xml">
<link href="https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap" rel="stylesheet">
<link href="https://fonts.googleapis.com/css2?family=Roboto+Slab:[email protected]&family=Tiny5&display=swap" rel="stylesheet">
<style>
:root {
--primary-color: cornflowerblue;
--primary-color-dark: #fffd87cf;
--secondary-color: #fff;
--background-color: #eee;
--text-color: #333333;
--header-color: cornflowerblue;
--body-color: #eee;
--menu-color: #002370;
}
.background-digit {
position: absolute;
font-family: 'Tiny5';
bottom: -20px;
right: -10px;
font-size: 8em;
font-weight: 400;
color: #0989ea22;
z-index: 2;
line-height: 1;
}
.dark-theme .background-digit {
color: #e9e78f3d;
}
body {
font-family: 'Roboto Slab', sans-serif;
line-height: 1.6;
color: var(--text-color);
margin: 0;
padding: 0;
min-height: 100vh;
display: flex;
flex-direction: column;
}
.container {
max-width: 1500px;
margin: 0 auto;
padding: 0 20px;
flex: 1 0 auto;
}
.a-clean {
color: var(--secondary-color);
text-decoration: none;
}
.a-clean:hover {
color: #fff;
}
header {
padding: 3.6em 0 2.4em 0;
text-align: center;
}
footer {
background-color: var(--primary-color);
color: white;
text-align: center;
margin-top: 2em;
flex-shrink: 0;
padding: 20px;
}
h1 {
font-size: 2.4em;
margin: 0;
font-weight: 700;
}
.article-title-cont {
margin: -21px -21px 0px -21px;
padding: 10px 20px;
background: cornflowerblue;
display: table;
min-height: 5.9em;
}
.dark-theme .article-title-cont {
background: #444444;
}
.article-title {
color: white;
}
.article-title h2 {
margin: 0px;
padding: 0px;
font-weight: 400;
text-align:center;
}
h2 {
# color: var(--primary-color);
font-size: 1.2em;
margin-top: 0;
margin-bottom: 0.5em;
}
header p {
font-size: 1.2em;
margin-top: 0.5em;
font-weight: 300;
}
main {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(400px, 1fr));
gap: 1.5em;
padding: 10px 0 20px 0;
}
body.dark-tmeme>header {
background-color: background-color: #333333;
color: white;
}
body.dark-theme>div>main>article>div.article-content>p.meta {
color: #fff;
}
body.light-theme>div>main>article>div.article-content>p.meta {
color: #555;
}
body.dark-theme>div>main>article>div.article-content>p.pub-date {
color: #ccc;
}
body.light-theme>div>main>article>div.article-content>p.pub-date {
color: #555;
}
body.dark-theme>div>main>article>div.article-content>div.tags {
color: #ccc;
}
body.light-theme>div>main>article>div.article-content>div.tags {
color: #fff;
}
body.light-theme>header {
background-color: var(--header-color);
color: white;
}
article {
border-radius: 5px;
border: 1px solid #ddd;
overflow: hidden;
transition: background-color 0.2s ease;
display: flex;
flex-direction: column;
position: relative;
}
.article-content {
padding: 1.3em;
flex-grow: 1;
display: flex;
flex-direction: column;
position: relative;
z-index: 1;
cursor: pointer;
}
body.dark-theme>div>main>article {
background-color: #444;
border: none;
}
body.light-theme>div>main>article {
background-color: #fff;
}
body.dark-theme>div>main>article:hover {
background-color: #414141;
}
body.light-theme>div>main>article:hover {
background-color: #fafafa;
}
.meta {
font-size: 0.9em;
margin-bottom: 0em;
font-weight: 500;
margin: 20px 0 0px 0;
padding-bottom: 20px;
border-bottom: 1px solid #ddd;
}
.pub-date {
font-size: 0.8em;
margin-bottom: 0.8em;
font-weight: 400;
text-align: right;
font-family: Roboto;
}
.tags {
font-size: 0.9em;
margin-bottom: 0;
position: absolute;
bottom: 0px;
font-weight: 300;
font-family: 'Roboto Slab';
background: #555;
left: 0;
width: 100%;
padding: 10px 20px;
}
.abstract {
position: relative;
max-height: 170px;
overflow: hidden;
transition: max-height 0.3s ease;
cursor: pointer;
}
.abstract.expanded {
max-height: 1000px;
}
.abstract-toggle {
position: absolute;
bottom: 4px;
right: 0;
cursor: pointer;
color: var(--primary-color);
float: right;
font-weight: 400;
}
.explanation {
background-color: #e8f5e9;
border-left: 4px solid var(--secondary-color);
padding: 1em;
margin-top: 1.5em;
}
.links {
margin-top: 1.5em;
margin-bottom: 20px;
}
.affiliations {
margin-bottom: 50px;
padding:10px;
font-size: 0.9em;
text-align: center
}
a {
color: var(--primary-color);
text-decoration: none;
font-weight: 500;
transition: color 0.3s ease;
}
.dark-theme a {
color: var(--primary-color-dark);
}
a:hover {
color: #e73838;
}
.light-theme {
background-color: var(--body-color);
color: #333333;
}
.dark-theme {
background-color: #333333;
color: #ffffff;
}
.theme-switch {
position: absolute;
top: 20px;
right: 20px;
display: flex;
align-items: center;
}
.switch {
position: relative;
display: inline-block;
width: 50px;
height: 30px;
}
.switch input {
opacity: 0;
width: 0;
height: 0;
}
.slider {
position: absolute;
cursor: pointer;
top: 0;
left: 0;
right: 0;
bottom: 0;
background-color: #ccc;
transition: .4s;
border-radius: 30px;
}
.slider:before {
position: absolute;
content: "";
height: 24px;
width: 24px;
left: 3px;
bottom: 3px;
background-color: white;
transition: .4s;
border-radius: 50%;
}
input:checked + .slider {
background-color: var(--primary-color);
}
input:checked + .slider:before {
transform: translateX(20px);
}
.switch-label {
margin-right: 10px;
}
.sub-header-container {
display: flex;
justify-content: space-between;
align-items: center;
flex-wrap: wrap;
gap: 15px;
margin-top: 7px;
}
.sub-header-container-2 {
display: flex;
justify-content: left;
align-items: center;
flex-wrap: wrap;
gap: 15px;
margin: 0 auto;
}
.update-info-container {
margin-top: 15px;
margin-bottom: 0px;
text-align: left;
flex: 1;
}
.sort-container {
margin-top: 15px;
margin-bottom: 0px;
text-align: right;
flex: 2;
}
.category-toggle-container {
display: inline-block;
margin-top: 15px;
margin-bottom: 10px;
cursor: pointer;
}
.category-option-container {
margin-top: 15px;
margin-bottom: 10px;
display: none;
margin-left: auto;
}
.category-option-container.expanded {
display: block;
}
.sort-dropdown {
padding: 5px 10px;
font-size: 16px;
border-radius: 5px;
border: 1px solid #ccc;
background-color: white;
color: var(--text-color);
font-family: 'Roboto Slab', sans-serif;
}
.sort-label {
margin-right: 10px;
font-size: 1.0em !important;
}
.dark-theme .sort-dropdown {
background-color: #444;
color: white;
border-color: var(--text-color);
}
.title-sign {
display: inline-block;
transition: all 0.5s ease;
}
.rotate {
transform: rotate(45deg) translateY(-6px);
transform-origin: center;
}
.title-text {
display: inline;
padding-left: 10px;
}
.category-filters {
margin-top: 20px;
margin-bottom: 20px;
text-align: center;
display: none;
}
.category-filters.expanded {
display: block;
margin-top: 10px;
}
.category-button {
display: inline-block;
margin: 5px;
padding: 5px 10px;
border-radius: 15px;
background-color: #f0f0f0;
color: #333;
cursor: pointer;
transition: background-color 0.3s ease;
}
.category-button.active {
background-color: var(--primary-color);
color: white;
}
.category-button.inactive:not(.active) {
color: #ccc;
}
.dark-theme .category-button {
background-color: #555;
color: #fff;
}
.dark-theme .category-button.active {
background-color: var(--primary-color);
}
.dark-theme .category-button.inactive:not(.active) {
color: #888;
}
.clear-categories {
display: inline-block;
margin: 5px;
padding: 5px 10px;
border-radius: 15px;
background-color: #f0f0f0;
color: #333;
cursor: pointer;
transition: background-color 0.3s ease;
}
.clear-categories:hover {
background-color: #bbb;
}
.svg-container {
display: inline-block;
position: relative;
overflow: hidden;
}
.svg-container span {
position: relative;
z-index: 1;
}
.svg-container svg {
position: absolute;
bottom: 0;
left: 0;
z-index: 0;
}
.nav-menu {
background-color: var(--menu-color);
padding: 2px 0 2px 0;
display: inline-block;
position: relative;
overflow: hidden;
width: 100%;
}
.nav-container {
max-width: 1500px;
margin: 0 auto;
padding: 0 20px;
display: flex;
justify-content: left;
gap: 3em;
}
.nav-container span a {
color: white;
}
.nav-item {
color: white;
padding: 3px 0px;
cursor: pointer;
font-weight: 400;
}
.nav-item:hover {
background-color: rgba(255, 255, 255, 0.1);
border-color: rgba(255, 255, 255, 0.3);
}
.language-flags {
display: flex;
gap: 7px;
padding: 5px 0px;
margin-left: auto;
}
.flag-svg {
width: 22px;
height: 22px;
cursor: pointer;
opacity: 0.4;
transition: opacity 0.3s ease;
border-radius: 2px;
}
.flag-svg.active {
opacity: 1;
}
.flag-svg:hover {
opacity: 0.8;
}
.dark-theme .nav-menu {
background-color: #333;
}
.dark-theme .nav-item {
color: white;
}
.dark-theme .nav-item:hover {
background-color: rgba(255, 255, 255, 0.05);
}
.pointer { cursor: pointer; }
.article-pdf-title-img {
max-width: 100%;
max-height: 400px;
display: inline-block;
margin-top: 10px;
margin-bottom: 10px;
border-radius: 5px;
}
.article-pdf-title-img-cont {
text-align: center;
}
.dark-theme .article-pdf-title-img {
opacity: 0.8;
filter: grayscale(1);
}
@media (max-width: 600px) {
.nav-container {
flex-direction: row;
gap: 1.5em;
}
.nav-item {
padding: 3px 0px;
}
}
@media (max-width: 768px) {
.category-filters {
display: none;
}
.category-toggle {
display: inline-block;
width: 100%;
text-align: left;
}
.category-filters.expanded {
display: block;
margin-top: 10px;
}
}
@media (max-width: 600px) {
.sub-header-container {
flex-direction: column;
align-items: flex-start;
}
.sort-container {
width: 100%;
display: flex;
justify-content: left;
margin: 0 auto;
}
.sort-dropdown {
margin-left: auto;
}
.sort-label {
margin-top: 5px;
float: left;
}
.sub-header-container-2 {
flex-direction: row;
align-items: flex-start;
}
.update-info-container {
text-align: left;
width: 100%;
margin-bottom: 0px;
}
.category-toggle-container {
margin-top: 15px;
text-align: left;
margin-bottom: 10px;
}
.category-option-container {
margin-top: 15px;
text-align: center;
margin-bottom: 10px;
}
main {
grid-template-columns: repeat(auto-fit);
gap: 0em;
padding: 10px 0 20px 0;
margin: 0 -20px;
}
footer {
margin-top: -20px;
}
article {
border-radius: 0px;
}
}
</style>
<script>
function toggleAbstract(id) {
var abstract = document.getElementById('abstract-' + id);
var toggle = document.getElementById('toggle-' + id);
if (abstract.classList.contains('expanded')) {
abstract.classList.remove('expanded');
toggle.textContent = '...';
} else {
abstract.classList.add('expanded');
toggle.textContent = '';
}
}
function getTimeDiff(dateString, lang='ru') {
const timeUnits = {
ru: {
minute: ["минуту", "минуты", "минут"],
hour: ["час", "часа", "часов"],
day: ["день", "дня", "дней"],
justNow: "только что",
ago: "назад"
},
en: {
minute: ["minute", "minutes", "minutes"],
hour: ["hour", "hours", "hours"],
day: ["day", "days", "days"],
justNow: "just now",
ago: "ago"
},
zh: {
minute: ["分钟", "分钟", "分钟"],
hour: ["小时", "小时", "小时"],
day: ["天", "天", "天"],
justNow: "刚刚",
ago: "前"
}
};
function getPlural(number, words, lang) {
if (lang === 'ru') {
if (number % 10 === 1 && number % 100 !== 11) {
return words[0];
} else if (number % 10 >= 2 && number % 10 <= 4 && (number % 100 < 10 || number % 100 >= 20)) {
return words[1];
} else {
return words[2];
}
} else if (lang === 'en') {
return number === 1 ? words[0] : words[1];
} else {
// Chinese doesn't need plural forms
return words[0];
}
}
function formatTimeDiff(number, unit, lang) {
const unitWord = getPlural(number, timeUnits[lang][unit], lang);
if (lang === 'zh') {
return `${number}${unitWord}${timeUnits[lang].ago}`;
} else {
return `${number} ${unitWord} ${timeUnits[lang].ago}`;
}
}
if (!['ru', 'en', 'zh'].includes(lang)) {
throw new Error('Unsupported language. Supported languages are: ru, en, zh');
}
const pastDate = new Date(dateString.replace(" ", "T") + ":00Z");
const currentDate = new Date();
const diffInSeconds = Math.floor((currentDate - pastDate) / 1000);
const minutes = Math.floor(diffInSeconds / 60);
const hours = Math.floor(diffInSeconds / 3600);
const days = Math.floor(diffInSeconds / 86400);
if (minutes === 0) {
return timeUnits[lang].justNow;
} else if (minutes < 60) {
return formatTimeDiff(minutes, 'minute', lang);
} else if (hours < 24) {
return formatTimeDiff(hours, 'hour', lang);
} else {
return formatTimeDiff(days, 'day', lang);
}
}
function isToday(dateString) {
const inputDate = new Date(dateString);
const today = new Date();
return (
inputDate.getFullYear() === today.getFullYear() &&
inputDate.getMonth() === today.getMonth() &&
inputDate.getDate() === today.getDate()
);
}
function isCurrentMonth(dateString) {
const inputDate = new Date(dateString);
const today = new Date();
return (
inputDate.getFullYear() === today.getFullYear() &&
inputDate.getMonth() === today.getMonth()
);
}
function formatArticlesTitle(number, lang='ru') {
const lastDigit = number % 10;
const lastTwoDigits = number % 100;
let word;
if (!['ru', 'en', 'zh'].includes(lang)) {
throw new Error('Unsupported language. Supported languages are: ru, en, zh');
}
if (lang === 'ru') {
if (lastTwoDigits >= 11 && lastTwoDigits <= 14) {
word = "статей";
} else if (lastDigit === 1) {
word = "статья";
} else if (lastDigit >= 2 && lastDigit <= 4) {
word = "статьи";
} else {
word = "статей";
}
} else if (lang === 'en') {
if (number === 1) {
word = 'paper'
} else {
word = 'papers'
}
} else if (lang === 'zh') {
word = "篇论文"
}
if (lang === 'zh') {
return `${number}${word}`;
} else {
return `${number} ${word}`;
}
}
</script>
</head>
<body class="light-theme">
<header>
<div class="container">
<a href="https://hfday.ru" class="a-clean"><h1 class="title-sign" id="doomgrad-icon">🔺</h1><h1 class="title-text" id="doomgrad">hf daily</h1></a>
<p><span id="title-date">26 сентября</span> | <span id="title-articles-count">12 papers</span></p>
</div>
<div class="theme-switch">
<label class="switch">
<input type="checkbox" id="theme-toggle">
<span class="slider"></span>
</label>
</div>
</header>
<div class="nav-menu">
<div class="nav-container">
<span class="nav-item" id="nav-prev"><a href="/d/2024-09-25.html">⬅️ <span id="prev-date">25.09</span></a></span>
<span class="nav-item" id="nav-next"><a href="/d/2024-09-27.html">➡️ <span id="next-date">27.09</span></a></span>
<span class="nav-item" id="nav-monthly"><a href="/m/2024-09.html">📈 <span id='top-month-label'>Месяц</span></a></span>
<div class="language-flags">
<svg class="flag-svg" data-lang="ru" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><path fill="#1435a1" d="M1 11H31V21H1z"></path><path d="M5,4H27c2.208,0,4,1.792,4,4v4H1v-4c0-2.208,1.792-4,4-4Z" fill="#fff"></path><path d="M5,20H27c2.208,0,4,1.792,4,4v4H1v-4c0-2.208,1.792-4,4-4Z" transform="rotate(180 16 24)" fill="#c53a28"></path><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path></svg>
<svg class="flag-svg" data-lang="zh" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><rect x="1" y="4" width="30" height="24" rx="4" ry="4" fill="#db362f"></rect><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path fill="#ff0" d="M7.958 10.152L7.19 7.786 6.421 10.152 3.934 10.152 5.946 11.614 5.177 13.979 7.19 12.517 9.202 13.979 8.433 11.614 10.446 10.152 7.958 10.152z"></path><path fill="#ff0" d="M12.725 8.187L13.152 8.898 13.224 8.072 14.032 7.886 13.269 7.562 13.342 6.736 12.798 7.361 12.035 7.037 12.461 7.748 11.917 8.373 12.725 8.187z"></path><path fill="#ff0" d="M14.865 10.372L14.982 11.193 15.37 10.46 16.187 10.602 15.61 10.007 15.997 9.274 15.253 9.639 14.675 9.044 14.793 9.865 14.048 10.23 14.865 10.372z"></path><path fill="#ff0" d="M15.597 13.612L16.25 13.101 15.421 13.13 15.137 12.352 14.909 13.149 14.081 13.179 14.769 13.642 14.541 14.439 15.194 13.928 15.881 14.391 15.597 13.612z"></path><path fill="#ff0" d="M13.26 15.535L13.298 14.707 12.78 15.354 12.005 15.062 12.46 15.754 11.942 16.402 12.742 16.182 13.198 16.875 13.236 16.047 14.036 15.827 13.26 15.535z"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path></svg>
<svg class="flag-svg" data-lang="en" xmlns="http://www.w3.org/2000/svg" width="32" height="32" viewBox="0 0 32 32"><rect x="1" y="4" width="30" height="24" rx="4" ry="4" fill="#fff"></rect><path d="M1.638,5.846H30.362c-.711-1.108-1.947-1.846-3.362-1.846H5c-1.414,0-2.65,.738-3.362,1.846Z" fill="#a62842"></path><path d="M2.03,7.692c-.008,.103-.03,.202-.03,.308v1.539H31v-1.539c0-.105-.022-.204-.03-.308H2.03Z" fill="#a62842"></path><path fill="#a62842" d="M2 11.385H31V13.231H2z"></path><path fill="#a62842" d="M2 15.077H31V16.923000000000002H2z"></path><path fill="#a62842" d="M1 18.769H31V20.615H1z"></path><path d="M1,24c0,.105,.023,.204,.031,.308H30.969c.008-.103,.031-.202,.031-.308v-1.539H1v1.539Z" fill="#a62842"></path><path d="M30.362,26.154H1.638c.711,1.108,1.947,1.846,3.362,1.846H27c1.414,0,2.65-.738,3.362-1.846Z" fill="#a62842"></path><path d="M5,4h11v12.923H1V8c0-2.208,1.792-4,4-4Z" fill="#102d5e"></path><path d="M27,4H5c-2.209,0-4,1.791-4,4V24c0,2.209,1.791,4,4,4H27c2.209,0,4-1.791,4-4V8c0-2.209-1.791-4-4-4Zm3,20c0,1.654-1.346,3-3,3H5c-1.654,0-3-1.346-3-3V8c0-1.654,1.346-3,3-3H27c1.654,0,3,1.346,3,3V24Z" opacity=".15"></path><path d="M27,5H5c-1.657,0-3,1.343-3,3v1c0-1.657,1.343-3,3-3H27c1.657,0,3,1.343,3,3v-1c0-1.657-1.343-3-3-3Z" fill="#fff" opacity=".2"></path><path fill="#fff" d="M4.601 7.463L5.193 7.033 4.462 7.033 4.236 6.338 4.01 7.033 3.279 7.033 3.87 7.463 3.644 8.158 4.236 7.729 4.827 8.158 4.601 7.463z"></path><path fill="#fff" d="M7.58 7.463L8.172 7.033 7.441 7.033 7.215 6.338 6.989 7.033 6.258 7.033 6.849 7.463 6.623 8.158 7.215 7.729 7.806 8.158 7.58 7.463z"></path><path fill="#fff" d="M10.56 7.463L11.151 7.033 10.42 7.033 10.194 6.338 9.968 7.033 9.237 7.033 9.828 7.463 9.603 8.158 10.194 7.729 10.785 8.158 10.56 7.463z"></path><path fill="#fff" d="M6.066 9.283L6.658 8.854 5.927 8.854 5.701 8.158 5.475 8.854 4.744 8.854 5.335 9.283 5.109 9.979 5.701 9.549 6.292 9.979 6.066 9.283z"></path><path fill="#fff" d="M9.046 9.283L9.637 8.854 8.906 8.854 8.68 8.158 8.454 8.854 7.723 8.854 8.314 9.283 8.089 9.979 8.68 9.549 9.271 9.979 9.046 9.283z"></path><path fill="#fff" d="M12.025 9.283L12.616 8.854 11.885 8.854 11.659 8.158 11.433 8.854 10.702 8.854 11.294 9.283 11.068 9.979 11.659 9.549 12.251 9.979 12.025 9.283z"></path><path fill="#fff" d="M6.066 12.924L6.658 12.494 5.927 12.494 5.701 11.799 5.475 12.494 4.744 12.494 5.335 12.924 5.109 13.619 5.701 13.19 6.292 13.619 6.066 12.924z"></path><path fill="#fff" d="M9.046 12.924L9.637 12.494 8.906 12.494 8.68 11.799 8.454 12.494 7.723 12.494 8.314 12.924 8.089 13.619 8.68 13.19 9.271 13.619 9.046 12.924z"></path><path fill="#fff" d="M12.025 12.924L12.616 12.494 11.885 12.494 11.659 11.799 11.433 12.494 10.702 12.494 11.294 12.924 11.068 13.619 11.659 13.19 12.251 13.619 12.025 12.924z"></path><path fill="#fff" d="M13.539 7.463L14.13 7.033 13.399 7.033 13.173 6.338 12.947 7.033 12.216 7.033 12.808 7.463 12.582 8.158 13.173 7.729 13.765 8.158 13.539 7.463z"></path><path fill="#fff" d="M4.601 11.104L5.193 10.674 4.462 10.674 4.236 9.979 4.01 10.674 3.279 10.674 3.87 11.104 3.644 11.799 4.236 11.369 4.827 11.799 4.601 11.104z"></path><path fill="#fff" d="M7.58 11.104L8.172 10.674 7.441 10.674 7.215 9.979 6.989 10.674 6.258 10.674 6.849 11.104 6.623 11.799 7.215 11.369 7.806 11.799 7.58 11.104z"></path><path fill="#fff" d="M10.56 11.104L11.151 10.674 10.42 10.674 10.194 9.979 9.968 10.674 9.237 10.674 9.828 11.104 9.603 11.799 10.194 11.369 10.785 11.799 10.56 11.104z"></path><path fill="#fff" d="M13.539 11.104L14.13 10.674 13.399 10.674 13.173 9.979 12.947 10.674 12.216 10.674 12.808 11.104 12.582 11.799 13.173 11.369 13.765 11.799 13.539 11.104z"></path><path fill="#fff" d="M4.601 14.744L5.193 14.315 4.462 14.315 4.236 13.619 4.01 14.315 3.279 14.315 3.87 14.744 3.644 15.44 4.236 15.01 4.827 15.44 4.601 14.744z"></path><path fill="#fff" d="M7.58 14.744L8.172 14.315 7.441 14.315 7.215 13.619 6.989 14.315 6.258 14.315 6.849 14.744 6.623 15.44 7.215 15.01 7.806 15.44 7.58 14.744z"></path><path fill="#fff" d="M10.56 14.744L11.151 14.315 10.42 14.315 10.194 13.619 9.968 14.315 9.237 14.315 9.828 14.744 9.603 15.44 10.194 15.01 10.785 15.44 10.56 14.744z"></path><path fill="#fff" d="M13.539 14.744L14.13 14.315 13.399 14.315 13.173 13.619 12.947 14.315 12.216 14.315 12.808 14.744 12.582 15.44 13.173 15.01 13.765 15.44 13.539 14.744z"></path></svg>
</div>
</div>
</div>
<div class="container">
<div class="sub-header-container">
<div class="update-info-container">
<label class="update-info-label" id="timeDiff"></label>
</div>
<div class="sort-container">
<label class="sort-label">🔀 <span id="sort-label-text">Сортировка по</span></label>
<select id="sort-dropdown" class="sort-dropdown">
<option value="default">рейтингу</option>
<option value="pub_date">дате публикации</option>
<option value="issue_id">добавлению на HF</option>
</select>
</div>
</div>
<div class="sub-header-container-2">
<div class="category-toggle-container">
<div class="svg-container">
<span id="category-toggle">🏷️ Фильтр</span>
<svg height="3" width="200">
<line x1="0" y1="0" x2="200" y2="0"
stroke="black"
stroke-width="2"
stroke-dasharray="3, 3" />
</svg>
</div>
</div>
<div class="category-option-container" id="category-options">
<label class="pointer" for="filter-logic-or"><input type="radio" id="filter-logic-or" name="filter-logic" value="or"> A∪B</label>
<label class="pointer" for="filter-logic-and"><input type="radio" id="filter-logic-and" name="filter-logic" value="and"> A∩B</label>
</div>
</div>
<div class="category-filters" id="category-filters">
<span class="clear-categories" id="clear-categories">🧹</span>
<!-- Categories -->
</div>
<main id="articles-container">
<!-- Articles -->
</main>
</div>
<footer>
<div class="container">
<p><a style="color:white;" href="https://t.me/doomgrad">doomgrad</a> ✖️ <a style="color:white;" href="https://huggingface.co/papers">hugging face</a></p>
</div>
</footer>
<script>
// Language handling
let currentLang = localStorage.getItem('selectedLang') || 'en';
let feedDate = {'ru': '26 сентября', 'en': 'September 26', 'zh': '9月26日'};
let feedDateNext = {'ru': '27.09', 'en': '09/27', 'zh': '9月27日'};
let feedDatePrev = {'ru': '25.09', 'en': '09/25', 'zh': '9月25日'};
let filterLabel = {'ru': 'Фильтр', 'en': 'Topics', 'zh': '主题筛选'}
let publishedLabel = {'ru': 'статья от ', 'en': 'published on ', 'zh': '发表于'}
let sortLabel = {'ru': 'Сортировка по', 'en': 'Sort by', 'zh': '排序方式'}
let paperLabel = {'ru': 'Статья', 'en': 'Paper', 'zh': '论文'}
let topMonthLabel = {'ru': 'Месяц', 'en': 'Month', 'zh': '月度论文'}
let topDayLabel = {'ru': 'День', 'en': 'Day', 'zh': '日度论文'}
function initializeLanguageFlags() {
const flags = document.querySelectorAll('.flag-svg');
flags.forEach(flag => {
if (flag.dataset.lang === currentLang) {
flag.classList.add('active');
}
flag.addEventListener('click', () => {
flags.forEach(f => f.classList.remove('active'));
flag.classList.add('active');
currentLang = flag.dataset.lang;
localStorage.setItem('selectedLang', currentLang);
updateTimeDiffs();
updateLocalization();
filterAndRenderArticles();
});
});
}
function toggleTheme() {
const body = document.body;
body.classList.toggle('light-theme');
body.classList.toggle('dark-theme');
const isDarkMode = body.classList.contains('dark-theme');
localStorage.setItem('darkMode', isDarkMode);
if (isDarkMode) {
const title = document.getElementById('doomgrad');
title.innerHTML = "hf nightly";
const titleSign = document.getElementById('doomgrad-icon');
titleSign.classList.add('rotate');
} else {
const title = document.getElementById('doomgrad');
title.innerHTML = "hf daily";
const titleSign = document.getElementById('doomgrad-icon');
titleSign.classList.remove('rotate');
}
}
const articlesData = [{'id': 'https://huggingface.co/papers/2409.17146', 'title': 'Molmo and PixMo: Open Weights and Open Data for State-of-the-Art Multimodal Models', 'url': 'https://huggingface.co/papers/2409.17146', 'abstract': "Today's most advanced multimodal models remain proprietary. The strongest open-weight models rely heavily on synthetic data from proprietary VLMs to achieve good performance, effectively distilling these closed models into open ones. As a result, the community is still missing foundational knowledge about how to build performant VLMs from scratch. We present Molmo, a new family of VLMs that are state-of-the-art in their class of openness. Our key innovation is a novel, highly detailed image caption dataset collected entirely from human annotators using speech-based descriptions. To enable a wide array of user interactions, we also introduce a diverse dataset mixture for fine-tuning that includes in-the-wild Q&A and innovative 2D pointing data. The success of our approach relies on careful choices for the model architecture details, a well-tuned training pipeline, and, most critically, the quality of our newly collected datasets, all of which will be released. The best-in-class 72B model within the Molmo family not only outperforms others in the class of open weight and data models but also compares favorably against proprietary systems like GPT-4o, Claude 3.5, and Gemini 1.5 on both academic benchmarks and human evaluation. We will be releasing all of our model weights, captioning and fine-tuning data, and source code in the near future. Select model weights, inference code, and demo are available at https://molmo.allenai.org.", 'score': 103, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': '3897ddd4f942abd3', 'authors': ['Matt Deitke', 'Christopher Clark', 'Sangho Lee', 'Rohun Tripathi', 'Yue Yang', 'Jae Sung Park', 'Mohammadreza Salehi', 'Niklas Muennighoff', 'Kyle Lo', 'Luca Soldaini', 'Jiasen Lu', 'Taira Anderson', 'Erin Bransom', 'Kiana Ehsani', 'Huong Ngo', 'YenSung Chen', 'Ajay Patel', 'Mark Yatskar', 'Chris Callison-Burch', 'Andrew Head', 'Rose Hendrix', 'Favyen Bastani', 'Eli VanderBilt', 'Nathan Lambert', 'Yvonne Chou', 'Arnavi Chheda', 'Jenna Sparks', 'Sam Skjonsberg', 'Michael Schmitz', 'Aaron Sarnat', 'Byron Bischoff', 'Pete Walsh', 'Chris Newell', 'Piper Wolters', 'Tanmay Gupta', 'Kuo-Hao Zeng', 'Jon Borchardt', 'Dirk Groeneveld', 'Jen Dumas', 'Crystal Nam', 'Sophie Lebrecht', 'Caitlin Wittlif', 'Carissa Schoenick', 'Oscar Michel', 'Ranjay Krishna', 'Luca Weihs', 'Noah A. Smith', 'Hannaneh Hajishirzi', 'Ross Girshick', 'Ali Farhadi', 'Aniruddha Kembhavi'], 'affiliations': ['Allen Institute for AI', 'University of Washington'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17146.jpg', 'data': {'categories': ['#audio', '#dataset', '#cv', '#training', '#data', '#benchmark', '#open_source', '#architecture', '#synthetic', '#multimodal'], 'emoji': '🔓', 'ru': {'title': 'Molmo: прорыв в открытых мультимодальных моделях', 'desc': 'Статья представляет новое семейство мультимодальных моделей Molmo, которые являются лучшими в своем классе открытых моделей. Ключевым нововведением является набор данных с подробными описаниями изображений, собранный с помощью речевых аннотаций. Модели обучены на разнообразном наборе данных, включающем вопросы-ответы и инновационные 2D-указания. Лучшая модель Molmo с 72 миллиардами параметров превосходит другие открытые модели и сравнима с проприетарными системами вроде GPT-4 и Gemini 1.5 по академическим бенчмаркам и оценкам людей.'}, 'en': {'title': 'Unlocking Open-Weight Vision-Language Models with Molmo', 'desc': "This paper introduces Molmo, a new family of open-weight vision-language models (VLMs) that achieve state-of-the-art performance. The key innovation is a detailed image caption dataset created by human annotators using speech-based descriptions, which enhances the model's understanding of visual content. Additionally, the authors present a diverse mixture of datasets for fine-tuning, including real-world question-and-answer data and 2D pointing interactions. The Molmo models, particularly the 72B variant, outperform existing open models and even compete well against proprietary systems, with plans to release all related resources to the community."}, 'zh': {'title': 'Molmo:开创开放多模态模型的新纪元', 'desc': '本文介绍了一种新的多模态模型家族Molmo,该模型在开放性方面处于领先地位。Molmo的创新之处在于其使用人类注释者收集的详细图像描述数据集。为了支持多种用户交互,研究团队还引入了多样化的微调数据集,包括野外问答和创新的2D指向数据。Molmo的最佳模型在开放权重和数据模型中表现优异,并在学术基准和人类评估中与一些专有系统相媲美。'}}}, {'id': 'https://huggingface.co/papers/2409.17115', 'title': 'Programming Every Example: Lifting Pre-training Data Quality like Experts at Scale', 'url': 'https://huggingface.co/papers/2409.17115', 'abstract': 'Large language model pre-training has traditionally relied on human experts to craft heuristics for improving the corpora quality, resulting in numerous rules developed to date. However, these rules lack the flexibility to address the unique characteristics of individual example effectively. Meanwhile, applying tailored rules to every example is impractical for human experts. In this paper, we demonstrate that even small language models, with as few as 0.3B parameters, can exhibit substantial data refining capabilities comparable to those of human experts. We introduce Programming Every Example (ProX), a novel framework that treats data refinement as a programming task, enabling models to refine corpora by generating and executing fine-grained operations, such as string normalization, for each individual example at scale. Experimental results show that models pre-trained on ProX-curated data outperform either original data or data filtered by other selection methods by more than 2% across various downstream benchmarks. Its effectiveness spans various model sizes and pre-training corpora, including C4, RedPajama-V2, and FineWeb. Furthermore, ProX exhibits significant potential in domain-specific continual pre-training: without domain specific design, models trained on OpenWebMath refined by ProX outperform human-crafted rule-based methods, improving average accuracy by 7.6% over Mistral-7B, with 14.6% for Llama-2-7B and 20.3% for CodeLlama-7B, all within 10B tokens to be comparable to models like Llemma-7B trained on 200B tokens. Further analysis highlights that ProX significantly saves training FLOPs, offering a promising path for efficient LLM pre-training.We are open-sourcing ProX with >100B corpus, models, and sharing all training and implementation details for reproducible research and future innovation. Code: https://github.com/GAIR-NLP/ProX', 'score': 59, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': '7949c35f04a3db9d', 'authors': ['Fan Zhou', 'Zengzhi Wang', 'Qian Liu', 'Junlong Li', 'Pengfei Liu'], 'affiliations': ['Generative AI Research Lab (GAIR)', 'Sea AI Lab', 'Shanghai Artificial Intelligence Laboratory', 'Shanghai Jiao Tong University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17115.jpg', 'data': {'categories': ['#science', '#dataset', '#multilingual', '#training', '#data', '#plp', '#optimization', '#benchmark', '#open_source', '#small_models', '#synthetic'], 'emoji': '🧹', 'ru': {'title': 'ProX: Программирование каждого примера для эффективного предобучения языковых моделей', 'desc': 'Статья представляет новый подход к предобучению языковых моделей, называемый Programming Every Example (ProX). Этот метод использует небольшие языковые модели для автоматического улучшения качества обучающих данных, заменяя традиционные эвристики, созданные экспертами. ProX позволяет моделям генерировать и выполнять операции по очистке данных для каждого примера в масштабе. Эксперименты показывают, что модели, предобученные на данных, обработанных ProX, превосходят модели, обученные на исходных данных или данных, отфильтрованных другими методами.'}, 'en': {'title': 'ProX: Empowering Language Models with Tailored Data Refinement', 'desc': 'This paper presents a new approach called Programming Every Example (ProX) for refining training data used in large language models. Instead of relying on rigid human-crafted rules, ProX allows models to generate and execute specific operations for each data example, enhancing flexibility and effectiveness. The results show that even smaller models can achieve data refinement capabilities similar to those of human experts, leading to improved performance on various tasks. ProX not only boosts accuracy but also reduces the computational resources needed for training, making it a promising method for efficient language model pre-training.'}, 'zh': {'title': 'ProX:个性化数据精炼的新方法', 'desc': '本论文提出了一种新的数据精炼框架,称为Programming Every Example(ProX),旨在提高大语言模型的预训练数据质量。ProX通过将数据精炼视为编程任务,使模型能够为每个示例生成和执行细粒度操作,从而实现数据的个性化处理。实验结果表明,使用ProX精炼的数据在多个下游任务中表现优于原始数据和其他筛选方法。该方法在不同模型规模和预训练语料库中均显示出显著的效果,尤其在特定领域的持续预训练中表现出色。'}}}, {'id': 'https://huggingface.co/papers/2409.15127', 'title': 'Boosting Healthcare LLMs Through Retrieved Context', 'url': 'https://huggingface.co/papers/2409.15127', 'abstract': 'Large Language Models (LLMs) have demonstrated remarkable capabilities in natural language processing, and yet, their factual inaccuracies and hallucinations limits their application, particularly in critical domains like healthcare. Context retrieval methods, by introducing relevant information as input, have emerged as a crucial approach for enhancing LLM factuality and reliability. This study explores the boundaries of context retrieval methods within the healthcare domain, optimizing their components and benchmarking their performance against open and closed alternatives. Our findings reveal how open LLMs, when augmented with an optimized retrieval system, can achieve performance comparable to the biggest private solutions on established healthcare benchmarks (multiple-choice question answering). Recognizing the lack of realism of including the possible answers within the question (a setup only found in medical exams), and after assessing a strong LLM performance degradation in the absence of those options, we extend the context retrieval system in that direction. In particular, we propose OpenMedPrompt a pipeline that improves the generation of more reliable open-ended answers, moving this technology closer to practical application.', 'score': 19, 'issue_id': 1, 'pub_date': '2024-09-23', 'pub_date_card': {'ru': '23 сентября', 'en': 'September 23', 'zh': '9月23日'}, 'hash': '3a7c5c8e7a8d8071', 'authors': ['Jordi Bayarri-Planas', 'Ashwin Kumar Gururajan', 'Dario Garcia-Gasulla'], 'affiliations': ['Barcelona Supercomputing Center'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.15127.jpg', 'data': {'categories': ['#science', '#hallucinations', '#long_context', '#rag', '#healthcare', '#benchmark', '#open_source', '#architecture'], 'emoji': '🏥', 'ru': {'title': 'Повышение надежности языковых моделей в медицине с помощью умного извлечения контекста', 'desc': 'Данная статья посвящена исследованию методов извлечения контекста для улучшения фактической точности больших языковых моделей (LLM) в области здравоохранения. Авторы оптимизируют компоненты системы извлечения контекста и сравнивают ее производительность с открытыми и закрытыми альтернативами. Результаты показывают, что открытые LLM с оптимизированной системой извлечения контекста могут достичь производительности, сопоставимой с крупнейшими частными решениями в медицинских тестах. Исследователи также предлагают пайплайн OpenMedPrompt для улучшения генерации более надежных ответов на открытые вопросы.'}, 'en': {'title': 'Enhancing Healthcare LLMs with Context Retrieval for Reliable Answers', 'desc': 'This paper discusses how Large Language Models (LLMs) can struggle with providing accurate information, especially in sensitive areas like healthcare. To improve their reliability, the authors focus on context retrieval methods that supply relevant information to the LLMs. They benchmark these methods against existing solutions and find that optimized retrieval systems can enhance the performance of open LLMs to match that of private models on healthcare tasks. The study introduces OpenMedPrompt, a new pipeline designed to generate more accurate open-ended responses, making LLMs more applicable in real-world healthcare scenarios.'}, 'zh': {'title': '优化检索系统,提升医疗领域LLM的可靠性', 'desc': '大型语言模型(LLMs)在自然语言处理方面表现出色,但在医疗等关键领域的事实准确性和幻觉问题限制了它们的应用。通过引入相关信息作为输入,上下文检索方法成为提高LLM事实性和可靠性的重要手段。本文研究了上下文检索方法在医疗领域的应用,优化其组件并与其他方法进行性能基准测试。我们的研究表明,经过优化的检索系统可以使开放式LLM在医疗基准测试中达到与大型私有解决方案相当的性能,并提出了OpenMedPrompt以生成更可靠的开放式答案。'}}}, {'id': 'https://huggingface.co/papers/2409.17145', 'title': 'DreamWaltz-G: Expressive 3D Gaussian Avatars from Skeleton-Guided 2D Diffusion', 'url': 'https://huggingface.co/papers/2409.17145', 'abstract': 'Leveraging pretrained 2D diffusion models and score distillation sampling (SDS), recent methods have shown promising results for text-to-3D avatar generation. However, generating high-quality 3D avatars capable of expressive animation remains challenging. In this work, we present DreamWaltz-G, a novel learning framework for animatable 3D avatar generation from text. The core of this framework lies in Skeleton-guided Score Distillation and Hybrid 3D Gaussian Avatar representation. Specifically, the proposed skeleton-guided score distillation integrates skeleton controls from 3D human templates into 2D diffusion models, enhancing the consistency of SDS supervision in terms of view and human pose. This facilitates the generation of high-quality avatars, mitigating issues such as multiple faces, extra limbs, and blurring. The proposed hybrid 3D Gaussian avatar representation builds on the efficient 3D Gaussians, combining neural implicit fields and parameterized 3D meshes to enable real-time rendering, stable SDS optimization, and expressive animation. Extensive experiments demonstrate that DreamWaltz-G is highly effective in generating and animating 3D avatars, outperforming existing methods in both visual quality and animation expressiveness. Our framework further supports diverse applications, including human video reenactment and multi-subject scene composition.', 'score': 13, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': '629ce97635711d75', 'authors': ['Yukun Huang', 'Jianan Wang', 'Ailing Zeng', 'Zheng-Jun Zha', 'Lei Zhang', 'Xihui Liu'], 'affiliations': [], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17145.jpg', 'data': {'categories': ['#cv', '#optimization', '#diffusion', '#architecture', '#3d'], 'emoji': '🕺', 'ru': {'title': 'Танцующие аватары: от текста к анимированным 3D-моделям', 'desc': 'DreamWaltz-G - это новый подход к созданию анимируемых 3D-аватаров из текстовых описаний. Он использует управляемую скелетом дистилляцию оценок и гибридное представление 3D-аватара на основе гауссовых функций. Метод интегрирует контроль скелета из 3D-шаблонов человека в 2D-диффузионные модели, что улучшает качество генерации и решает проблемы вроде множественных лиц или лишних конечностей. DreamWaltz-G превосходит существующие методы по визуальному качеству и выразительности анимации.'}, 'en': {'title': 'DreamWaltz-G: Transforming Text to Lively 3D Avatars!', 'desc': 'This paper introduces DreamWaltz-G, a new framework for creating 3D avatars from text that can be animated. It uses a technique called Skeleton-guided Score Distillation (SDS) to improve the quality of the generated avatars by incorporating 3D human skeletons into 2D diffusion models. The framework also employs a Hybrid 3D Gaussian representation, which combines different 3D modeling techniques for better rendering and animation. The results show that DreamWaltz-G produces high-quality, expressive avatars and outperforms previous methods in visual quality and animation capabilities.'}, 'zh': {'title': 'DreamWaltz-G:文本生成可动画3D头像的新框架', 'desc': '本论文提出了一种名为DreamWaltz-G的新框架,用于从文本生成可动画的3D头像。该框架的核心是骨架引导的得分蒸馏和混合3D高斯头像表示,能够提高生成头像的一致性和质量。通过将3D人类模板的骨架控制整合到2D扩散模型中,解决了多面孔、额外肢体和模糊等问题。实验结果表明,DreamWaltz-G在生成和动画3D头像方面表现优异,超越了现有方法。'}}}, {'id': 'https://huggingface.co/papers/2409.15041', 'title': 'AIM 2024 Sparse Neural Rendering Challenge: Dataset and Benchmark', 'url': 'https://huggingface.co/papers/2409.15041', 'abstract': 'Recent developments in differentiable and neural rendering have made impressive breakthroughs in a variety of 2D and 3D tasks, e.g. novel view synthesis, 3D reconstruction. Typically, differentiable rendering relies on a dense viewpoint coverage of the scene, such that the geometry can be disambiguated from appearance observations alone. Several challenges arise when only a few input views are available, often referred to as sparse or few-shot neural rendering. As this is an underconstrained problem, most existing approaches introduce the use of regularisation, together with a diversity of learnt and hand-crafted priors. A recurring problem in sparse rendering literature is the lack of an homogeneous, up-to-date, dataset and evaluation protocol. While high-resolution datasets are standard in dense reconstruction literature, sparse rendering methods often evaluate with low-resolution images. Additionally, data splits are inconsistent across different manuscripts, and testing ground-truth images are often publicly available, which may lead to over-fitting. In this work, we propose the Sparse Rendering (SpaRe) dataset and benchmark. We introduce a new dataset that follows the setup of the DTU MVS dataset. The dataset is composed of 97 new scenes based on synthetic, high-quality assets. Each scene has up to 64 camera views and 7 lighting configurations, rendered at 1600x1200 resolution. We release a training split of 82 scenes to foster generalizable approaches, and provide an online evaluation platform for the validation and test sets, whose ground-truth images remain hidden. We propose two different sparse configurations (3 and 9 input images respectively). This provides a powerful and convenient tool for reproducible evaluation, and enable researchers easy access to a public leaderboard with the state-of-the-art performance scores. Available at: https://sparebenchmark.github.io/', 'score': 12, 'issue_id': 1, 'pub_date': '2024-09-23', 'pub_date_card': {'ru': '23 сентября', 'en': 'September 23', 'zh': '9月23日'}, 'hash': '94750a64c54ff82d', 'authors': ['Michal Nazarczuk', 'Thomas Tanay', 'Sibi Catley-Chandar', 'Richard Shaw', 'Radu Timofte', 'Eduardo Pérez-Pellitero'], 'affiliations': ['Huawei Noahs Ark Lab, London, United Kingdom', 'University of Würzburg, Germany'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.15041.jpg', 'data': {'categories': ['#dataset', '#benchmark', '#open_source', '#synthetic', '#3d'], 'emoji': '🎥', 'ru': {'title': 'SpaRe: Новый стандарт для оценки алгоритмов нейронного рендеринга', 'desc': 'Статья представляет новый набор данных и бенчмарк для задачи рендеринга с малым количеством входных изображений (sparse rendering). Авторы создали датасет SpaRe, содержащий 97 высококачественных синтетических сцен с различными ракурсами камер и конфигурациями освещения. Предложены две конфигурации с 3 и 9 входными изображениями для оценки алгоритмов. Также авторы запустили онлайн-платформу для оценки моделей на скрытом тестовом наборе и публичный лидерборд для сравнения современных подходов.'}, 'en': {'title': 'Advancing Sparse Rendering with the SpaRe Dataset', 'desc': 'This paper presents the Sparse Rendering (SpaRe) dataset and benchmark, addressing the challenges in few-shot neural rendering. It highlights the need for a consistent and high-resolution dataset, as existing methods often rely on low-quality images and inconsistent data splits. The SpaRe dataset includes 97 synthetic scenes with multiple camera views and lighting conditions, designed to facilitate the evaluation of sparse rendering techniques. By providing a public leaderboard and an online evaluation platform, this work aims to promote reproducibility and advance research in the field of sparse rendering.'}, 'zh': {'title': '稀疏渲染新数据集,助力神经渲染研究', 'desc': '最近在可微渲染和神经渲染方面取得了显著进展,尤其是在2D和3D任务中,如新视角合成和3D重建。可微渲染通常依赖于场景的密集视角覆盖,以便从外观观察中区分几何形状。然而,当只有少量输入视图可用时,通常会面临稀疏或少样本神经渲染的挑战。为了解决这些问题,本文提出了稀疏渲染(SpaRe)数据集和基准测试,旨在提供一个一致的评估平台,促进可重复的评估和研究。'}}}, {'id': 'https://huggingface.co/papers/2409.17058', 'title': 'Degradation-Guided One-Step Image Super-Resolution with Diffusion Priors', 'url': 'https://huggingface.co/papers/2409.17058', 'abstract': 'Diffusion-based image super-resolution (SR) methods have achieved remarkable success by leveraging large pre-trained text-to-image diffusion models as priors. However, these methods still face two challenges: the requirement for dozens of sampling steps to achieve satisfactory results, which limits efficiency in real scenarios, and the neglect of degradation models, which are critical auxiliary information in solving the SR problem. In this work, we introduced a novel one-step SR model, which significantly addresses the efficiency issue of diffusion-based SR methods. Unlike existing fine-tuning strategies, we designed a degradation-guided Low-Rank Adaptation (LoRA) module specifically for SR, which corrects the model parameters based on the pre-estimated degradation information from low-resolution images. This module not only facilitates a powerful data-dependent or degradation-dependent SR model but also preserves the generative prior of the pre-trained diffusion model as much as possible. Furthermore, we tailor a novel training pipeline by introducing an online negative sample generation strategy. Combined with the classifier-free guidance strategy during inference, it largely improves the perceptual quality of the super-resolution results. Extensive experiments have demonstrated the superior efficiency and effectiveness of the proposed model compared to recent state-of-the-art methods.', 'score': 11, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': 'c52ca2b156d80f27', 'authors': ['Aiping Zhang', 'Zongsheng Yue', 'Renjing Pei', 'Wenqi Ren', 'Xiaochun Cao'], 'affiliations': ['Huawei Noahs Ark Lab', 'S-Lab, Nanyang Technological University, Singapore', 'School of Cyber Science and Technology, Shenzhen Campus of Sun Yat-sen University, Shenzhen, China'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.17058.jpg', 'data': {'categories': ['#cv', '#training', '#inference', '#optimization', '#diffusion', '#architecture'], 'emoji': '🔍', 'ru': {'title': 'Эффективное сверхразрешение изображений за один шаг с помощью диффузионных моделей', 'desc': 'Авторы представили новую модель для сверхразрешения изображений, основанную на диффузионных моделях. Ключевое новшество - возможность получения результата за один шаг, что значительно повышает эффективность метода. В работе предложен модуль низкоранговой адаптации (LoRA), учитывающий информацию о деградации изображения. Также разработан новый конвейер обучения с онлайн-генерацией отрицательных примеров. Экспериментальные результаты показывают превосходство предложенного метода над современными аналогами.'}, 'en': {'title': 'Efficient Super-Resolution with Degradation-Guided Diffusion Models', 'desc': "This paper presents a new approach to image super-resolution (SR) using diffusion models, focusing on improving efficiency and incorporating degradation models. The authors introduce a one-step SR model that reduces the number of sampling steps needed, making it faster for real-world applications. They also propose a Low-Rank Adaptation (LoRA) module that adjusts model parameters based on degradation information from low-resolution images, enhancing the model's performance. The combination of this module with a novel training pipeline and classifier-free guidance leads to better perceptual quality in the generated images, outperforming existing methods."}, 'zh': {'title': '高效超分辨率:一键解决图像退化问题', 'desc': '本文提出了一种基于扩散模型的图像超分辨率(SR)新方法,旨在提高效率并解决现有方法的不足。我们设计了一个低秩适应(LoRA)模块,利用低分辨率图像的退化信息来调整模型参数,从而实现一键超分辨率。该模块不仅增强了模型的适应性,还尽可能保留了预训练扩散模型的生成先验。此外,我们引入了一种在线负样本生成策略,结合无分类器引导策略,显著提升了超分辨率结果的感知质量。'}}}, {'id': 'https://huggingface.co/papers/2409.16629', 'title': 'Synchronize Dual Hands for Physics-Based Dexterous Guitar Playing', 'url': 'https://huggingface.co/papers/2409.16629', 'abstract': 'We present a novel approach to synthesize dexterous motions for physically simulated hands in tasks that require coordination between the control of two hands with high temporal precision. Instead of directly learning a joint policy to control two hands, our approach performs bimanual control through cooperative learning where each hand is treated as an individual agent. The individual policies for each hand are first trained separately, and then synchronized through latent space manipulation in a centralized environment to serve as a joint policy for two-hand control. By doing so, we avoid directly performing policy learning in the joint state-action space of two hands with higher dimensions, greatly improving the overall training efficiency. We demonstrate the effectiveness of our proposed approach in the challenging guitar-playing task. The virtual guitarist trained by our approach can synthesize motions from unstructured reference data of general guitar-playing practice motions, and accurately play diverse rhythms with complex chord pressing and string picking patterns based on the input guitar tabs that do not exist in the references. Along with this paper, we provide the motion capture data that we collected as the reference for policy training. Code is available at: https://pei-xu.github.io/guitar.', 'score': 10, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': '319d5a32d76eb024', 'authors': ['Pei Xu', 'Ruocheng Wang'], 'affiliations': ['Stanford University, USA'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.16629.jpg', 'data': {'categories': ['#dataset', '#rl', '#agents', '#games', '#open_source', '#robotics'], 'emoji': '🎸', 'ru': {'title': 'Кооперативное обучение для синтеза двуручных движений', 'desc': 'Авторы представляют новый подход к синтезу сложных движений для физически симулируемых рук в задачах, требующих координации между двумя руками с высокой временной точностью. Вместо прямого обучения совместной политики для управления двумя руками, подход использует кооперативное обучение, где каждая рука рассматривается как отдельный агент. Индивидуальные политики для каждой руки сначала обучаются отдельно, а затем синхронизируются через манипуляции в латентном пространстве в централизованной среде, чтобы служить совместной политикой для управления двумя руками. Эффективность подхода демонстрируется на сложной задаче игры на гитаре.'}, 'en': {'title': 'Efficient Bimanual Control through Cooperative Learning', 'desc': 'This paper introduces a new method for controlling two simulated hands to perform tasks that require precise coordination, like playing the guitar. Instead of creating a single complex policy for both hands, the authors train each hand as a separate agent and then synchronize their movements using latent space manipulation. This approach simplifies the learning process by avoiding the high-dimensional joint state-action space, leading to more efficient training. The results show that their method allows a virtual guitarist to accurately play various rhythms and complex patterns based on guitar tabs, even when trained on unstructured data.'}, 'zh': {'title': '高效双手控制的创新方法', 'desc': '本文提出了一种新颖的方法,用于合成物理模拟手的灵巧动作,特别是在需要高时间精度的双手协调任务中。我们的方法通过合作学习实现双手控制,将每只手视为独立的智能体,而不是直接学习控制两只手的联合策略。每只手的个体策略首先单独训练,然后通过潜在空间操作在集中环境中同步,以形成双手控制的联合策略。我们在挑战性的吉他演奏任务中验证了该方法的有效性,训练出的虚拟吉他手能够从无结构的参考数据中合成动作,准确演奏复杂的节奏和和弦。'}}}, {'id': 'https://huggingface.co/papers/2409.16493', 'title': 'NoTeeline: Supporting Real-Time Notetaking from Keypoints with Large Language Models', 'url': 'https://huggingface.co/papers/2409.16493', 'abstract': "Video has become a popular media form for information sharing and consumption. However, taking notes while watching a video requires significant time and effort. To address this, we propose a novel interactive system, NoTeeline, for taking real-time, personalized notes. NoTeeline lets users quickly jot down keypoints (micronotes), which are automatically expanded into full-fledged notes that capture the content of the user's micronotes and are consistent with the user's writing style. In a within-subjects study (N=12), we found that NoTeeline helps users create high-quality notes that capture the essence of their micronotes with a higher factual correctness (93.2%) while accurately reflecting their writing style. While using NoTeeline, participants experienced significantly reduced mental effort, captured satisfactory notes while writing 47% less text, and completed notetaking with 43.9% less time compared to a manual notetaking baseline.", 'score': 9, 'issue_id': 1, 'pub_date': '2024-09-24', 'pub_date_card': {'ru': '24 сентября', 'en': 'September 24', 'zh': '9月24日'}, 'hash': '83e7a802e2a7e4e8', 'authors': ['Faria Huq', 'Abdus Samee', 'David Chuan-en Lin', 'Xiaodi Alice Tang', 'Jeffrey P. Bigham'], 'affiliations': ['Bangladesh University of Engineering & Technology, Dhaka, Bangladesh', 'Carnegie Mellon University, Pittsburgh, PA, USA'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.16493.jpg', 'data': {'categories': ['#video', '#multimodal'], 'emoji': '📝', 'ru': {'title': 'NoTeeline: умный помощник для эффективного конспектирования видео', 'desc': 'Исследователи представили систему NoTeeline для создания персонализированных заметок в реальном времени при просмотре видео. Система позволяет пользователям быстро записывать ключевые моменты, которые автоматически расширяются в полноценные заметки, соответствующие стилю письма пользователя. Эксперимент показал, что NoTeeline помогает создавать качественные заметки с высокой фактической точностью при значительном сокращении затрачиваемого времени и усилий. Система продемонстрировала преимущества по сравнению с ручным ведением заметок, включая снижение ментальной нагрузки и объема написанного текста.'}, 'en': {'title': 'Revolutionizing Video Note-Taking with NoTeeline', 'desc': "The paper presents NoTeeline, an innovative interactive system designed to enhance the note-taking process while watching videos. It allows users to create quick, concise notes called micronotes, which are then transformed into comprehensive notes that align with the user's unique writing style. A study involving 12 participants demonstrated that NoTeeline significantly improves the quality and factual accuracy of notes, achieving a correctness rate of 93.2%. Additionally, users reported reduced mental effort, less text written (47% less), and faster completion times (43.9% less) compared to traditional note-taking methods."}, 'zh': {'title': '实时个性化笔记,轻松记录视频精华', 'desc': '本论文提出了一种名为NoTeeline的互动系统,旨在帮助用户在观看视频时实时记录个性化笔记。用户可以快速记录关键点(微笔记),系统会自动将其扩展为完整的笔记,确保内容与用户的写作风格一致。研究结果显示,使用NoTeeline的用户能够以更高的准确性(93.2%)创建高质量的笔记,同时减少了47%的文本输入量和43.9%的时间消耗。该系统显著降低了用户的心理负担,提高了笔记的满意度。'}}}, {'id': 'https://huggingface.co/papers/2409.16299', 'title': 'HyperAgent: Generalist Software Engineering Agents to Solve Coding Tasks at Scale', 'url': 'https://huggingface.co/papers/2409.16299', 'abstract': "Large Language Models (LLMs) have revolutionized software engineering (SE), demonstrating remarkable capabilities in various coding tasks. While recent efforts have produced autonomous software agents based on LLMs for end-to-end development tasks, these systems are typically designed for specific SE tasks. We introduce HyperAgent, a novel generalist multi-agent system designed to address a wide spectrum of SE tasks across different programming languages by mimicking human developers' workflows. Comprising four specialized agents - Planner, Navigator, Code Editor, and Executor. HyperAgent manages the full lifecycle of SE tasks, from initial conception to final verification. Through extensive evaluations, HyperAgent achieves state-of-the-art performance across diverse SE tasks: it attains a 25.01% success rate on SWE-Bench-Lite and 31.40% on SWE-Bench-Verified for GitHub issue resolution, surpassing existing methods. Furthermore, HyperAgent demonstrates SOTA performance in repository-level code generation (RepoExec), and in fault localization and program repair (Defects4J), often outperforming specialized systems. This work represents a significant advancement towards versatile, autonomous agents capable of handling complex, multi-step SE tasks across various domains and languages, potentially transforming AI-assisted software development practices.", 'score': 9, 'issue_id': 1, 'pub_date': '2024-09-09', 'pub_date_card': {'ru': '9 сентября', 'en': 'September 9', 'zh': '9月9日'}, 'hash': 'a713e3f82d512439', 'authors': ['Huy Nhat Phan', 'Tien N. Nguyen', 'Phong X. Nguyen', 'Nghi D. Q. Bui'], 'affiliations': ['FPT Software AI Center, Viet Nam', 'The University of Texas at Dallas, USA'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.16299.jpg', 'data': {'categories': ['#reasoning', '#multilingual', '#agi', '#optimization', '#plp', '#agents', '#benchmark', '#architecture'], 'emoji': '🤖', 'ru': {'title': 'HyperAgent: Универсальный ИИ-помощник для программистов', 'desc': 'HyperAgent - это новая мультиагентная система, разработанная для решения широкого спектра задач программной инженерии на разных языках программирования. Система состоит из четырех специализированных агентов: Планировщика, Навигатора, Редактора кода и Исполнителя, которые имитируют рабочий процесс человека-разработчика. HyperAgent показывает улучшенные результаты в различных задачах, включая разрешение GitHub issues, генерацию кода на уровне репозитория и локализацию ошибок. Это значительный шаг вперед в создании универсальных автономных агентов для сложных многоэтапных задач разработки программного обеспечения.'}, 'en': {'title': 'HyperAgent: Revolutionizing Software Engineering with Multi-Agent Intelligence', 'desc': 'This paper presents HyperAgent, a generalist multi-agent system that enhances software engineering (SE) by mimicking human workflows. It consists of four specialized agents: Planner, Navigator, Code Editor, and Executor, which together manage the entire lifecycle of SE tasks. HyperAgent has shown superior performance in various coding challenges, achieving notable success rates in GitHub issue resolution and repository-level code generation. This innovation marks a significant step towards creating versatile, autonomous agents that can efficiently tackle complex SE tasks across multiple programming languages.'}, 'zh': {'title': 'HyperAgent:通用软件工程的智能代理', 'desc': '大型语言模型(LLMs)在软件工程(SE)领域带来了革命性的变化,展现了在各种编码任务中的卓越能力。我们提出了HyperAgent,这是一种新型的通用多代理系统,旨在通过模拟人类开发者的工作流程,解决不同编程语言中的广泛SE任务。HyperAgent由四个专业代理组成:规划者、导航者、代码编辑器和执行者,能够管理SE任务的整个生命周期,从初步构想到最终验证。经过广泛评估,HyperAgent在多种SE任务中实现了最先进的性能,超越了现有方法,标志着向能够处理复杂多步骤SE任务的自主代理的重要进展。'}}}, {'id': 'https://huggingface.co/papers/2409.16925', 'title': 'Game4Loc: A UAV Geo-Localization Benchmark from Game Data', 'url': 'https://huggingface.co/papers/2409.16925', 'abstract': 'The vision-based geo-localization technology for UAV, serving as a secondary source of GPS information in addition to the global navigation satellite systems (GNSS), can still operate independently in the GPS-denied environment. Recent deep learning based methods attribute this as the task of image matching and retrieval. By retrieving drone-view images in geo-tagged satellite image database, approximate localization information can be obtained. However, due to high costs and privacy concerns, it is usually difficult to obtain large quantities of drone-view images from a continuous area. Existing drone-view datasets are mostly composed of small-scale aerial photography with a strong assumption that there exists a perfect one-to-one aligned reference image for any query, leaving a significant gap from the practical localization scenario. In this work, we construct a large-range contiguous area UAV geo-localization dataset named GTA-UAV, featuring multiple flight altitudes, attitudes, scenes, and targets using modern computer games. Based on this dataset, we introduce a more practical UAV geo-localization task including partial matches of cross-view paired data, and expand the image-level retrieval to the actual localization in terms of distance (meters). For the construction of drone-view and satellite-view pairs, we adopt a weight-based contrastive learning approach, which allows for effective learning while avoiding additional post-processing matching steps. Experiments demonstrate the effectiveness of our data and training method for UAV geo-localization, as well as the generalization capabilities to real-world scenarios.', 'score': 6, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': 'bc7c7309053e8db8', 'authors': ['Yuxiang Ji', 'Boyong He', 'Zhuoyue Tan', 'Liaoni Wu'], 'affiliations': ['Institute of Artificial Intelligence, Xiamen University', 'School of Aerospace Engineering, Xiamen University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.16925.jpg', 'data': {'categories': ['#dataset', '#cv', '#training', '#graphs', '#optimization', '#games', '#synthetic', '#3d'], 'emoji': '🛰️', 'ru': {'title': 'Геолокация БПЛА без GPS: новый взгляд с высоты птичьего полета', 'desc': 'Статья представляет новый подход к геолокации беспилотных летательных аппаратов (БПЛА) с использованием компьютерного зрения как альтернативы GPS. Авторы создали масштабный набор данных GTA-UAV, симулирующий различные условия полета БПЛА. Предложенный метод основан на сопоставлении изображений с БПЛА и спутниковых снимков с использованием контрастного обучения. Эксперименты показывают эффективность подхода и его применимость в реальных сценариях.'}, 'en': {'title': 'Revolutionizing UAV Localization with GTA-UAV Dataset', 'desc': 'This paper presents a new dataset called GTA-UAV for vision-based geo-localization of UAVs, which can function without GPS in areas where satellite signals are unavailable. The authors address the limitations of existing datasets that assume perfect image alignment, which is often unrealistic in practical situations. They propose a novel task that includes partial matches between drone-view and satellite-view images, enhancing the localization process by measuring actual distances. The study employs a weight-based contrastive learning method to improve the learning process and demonstrates the effectiveness of their approach through experiments that show good performance in real-world applications.'}, 'zh': {'title': '无人机地理定位的新突破', 'desc': '本文介绍了一种基于视觉的无人机地理定位技术,作为全球导航卫星系统(GNSS)的辅助信息源,能够在没有GPS信号的环境中独立工作。我们构建了一个名为GTA-UAV的大范围连续区域无人机地理定位数据集,包含多种飞行高度、姿态、场景和目标。通过采用基于权重的对比学习方法,我们实现了无人机视角与卫星视角图像对的有效匹配,避免了额外的后处理步骤。实验结果表明,我们的方法在无人机地理定位任务中具有良好的效果和实际场景的泛化能力。'}}}, {'id': 'https://huggingface.co/papers/2409.16288', 'title': 'Self-Supervised Any-Point Tracking by Contrastive Random Walks', 'url': 'https://huggingface.co/papers/2409.16288', 'abstract': 'We present a simple, self-supervised approach to the Tracking Any Point (TAP) problem. We train a global matching transformer to find cycle consistent tracks through video via contrastive random walks, using the transformer\'s attention-based global matching to define the transition matrices for a random walk on a space-time graph. The ability to perform "all pairs" comparisons between points allows the model to obtain high spatial precision and to obtain a strong contrastive learning signal, while avoiding many of the complexities of recent approaches (such as coarse-to-fine matching). To do this, we propose a number of design decisions that allow global matching architectures to be trained through self-supervision using cycle consistency. For example, we identify that transformer-based methods are sensitive to shortcut solutions, and propose a data augmentation scheme to address them. Our method achieves strong performance on the TapVid benchmarks, outperforming previous self-supervised tracking methods, such as DIFT, and is competitive with several supervised methods.', 'score': 5, 'issue_id': 1, 'pub_date': '2024-09-24', 'pub_date_card': {'ru': '24 сентября', 'en': 'September 24', 'zh': '9月24日'}, 'hash': '9d0502c19cafc49e', 'authors': ['Ayush Shrivastava', 'Andrew Owens'], 'affiliations': ['University of Michigan'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.16288.jpg', 'data': {'categories': ['#video', '#cv', '#training', '#graphs', '#optimization', '#benchmark', '#games', '#architecture'], 'emoji': '🎯', 'ru': {'title': 'Глобальное сопоставление для точного отслеживания объектов в видео', 'desc': 'Статья представляет простой самоконтролируемый подход к задаче отслеживания любой точки (TAP). Авторы обучают глобальный трансформер сопоставления для поиска циклически согласованных треков в видео с помощью контрастивных случайных блужданий. Модель использует глобальное сопоставление на основе внимания трансформера для определения матриц перехода при случайном блуждании по пространственно-временному графу. Метод достигает высоких результатов на бенчмарках TapVid, превосходя предыдущие самоконтролируемые методы отслеживания.'}, 'en': {'title': 'Revolutionizing Video Tracking with Self-Supervised Learning', 'desc': 'This paper introduces a self-supervised method for the Tracking Any Point (TAP) problem using a global matching transformer. The approach leverages contrastive random walks to establish cycle consistent tracks in video data, enhancing spatial precision through all pairs comparisons. By focusing on self-supervision and cycle consistency, the model simplifies the training process while effectively avoiding common pitfalls in tracking methods. The proposed design choices, including a data augmentation strategy, lead to superior performance on the TapVid benchmarks compared to existing self-supervised and some supervised tracking techniques.'}, 'zh': {'title': '自监督任意点跟踪的新方法', 'desc': '本文提出了一种简单的自监督方法来解决任意点跟踪(TAP)问题。我们训练了一个全局匹配变换器,通过对比随机游走在视频中找到循环一致的轨迹。该方法利用变换器的注意力机制进行全局匹配,从而定义时空图上的随机游走转移矩阵。我们的模型在TapVid基准测试中表现出色,超越了之前的自监督跟踪方法,并与一些监督方法具有竞争力。'}}}, {'id': 'https://huggingface.co/papers/2409.16666', 'title': 'TalkinNeRF: Animatable Neural Fields for Full-Body Talking Humans', 'url': 'https://huggingface.co/papers/2409.16666', 'abstract': 'We introduce a novel framework that learns a dynamic neural radiance field (NeRF) for full-body talking humans from monocular videos. Prior work represents only the body pose or the face. However, humans communicate with their full body, combining body pose, hand gestures, as well as facial expressions. In this work, we propose TalkinNeRF, a unified NeRF-based network that represents the holistic 4D human motion. Given a monocular video of a subject, we learn corresponding modules for the body, face, and hands, that are combined together to generate the final result. To capture complex finger articulation, we learn an additional deformation field for the hands. Our multi-identity representation enables simultaneous training for multiple subjects, as well as robust animation under completely unseen poses. It can also generalize to novel identities, given only a short video as input. We demonstrate state-of-the-art performance for animating full-body talking humans, with fine-grained hand articulation and facial expressions.', 'score': 5, 'issue_id': 1, 'pub_date': '2024-09-25', 'pub_date_card': {'ru': '25 сентября', 'en': 'September 25', 'zh': '9月25日'}, 'hash': '95c442e9c5d9f23c', 'authors': ['Aggelina Chatziagapi', 'Bindita Chaudhuri', 'Amit Kumar', 'Rakesh Ranjan', 'Dimitris Samaras', 'Nikolaos Sarafianos'], 'affiliations': ['Flawless AI', 'Meta Reality Labs', 'Stony Brook University'], 'pdf_title_img': 'assets\\pdf\\title_img\\2409.16666.jpg', 'data': {'categories': ['#video', '#architecture', '#cv', '#3d'], 'emoji': '🗣️', 'ru': {'title': 'TalkinNeRF: Реалистичная анимация говорящих людей с помощью NeRF', 'desc': 'Статья представляет новую систему для создания динамических нейронных полей излучения (NeRF) полноразмерных говорящих людей на основе монокулярных видео. TalkinNeRF - это унифицированная NeRF-сеть, которая представляет целостное 4D-движение человека, включая позу тела, жесты рук и мимику. Система использует отдельные модули для тела, лица и рук, а также дополнительное поле деформации для сложной артикуляции пальцев. Многоидентичное представление позволяет одновременно обучать модель на нескольких субъектах и обобщать ее на новые личности.'}, 'en': {'title': 'Animating Full-Body Talking Humans with TalkinNeRF', 'desc': 'This paper presents TalkinNeRF, a new framework that learns to create dynamic neural radiance fields (NeRF) for animating full-body talking humans using just monocular videos. Unlike previous methods that focused only on body pose or facial expressions, TalkinNeRF integrates body motion, hand gestures, and facial expressions into a single model. It includes specialized modules for the body, face, and hands, and introduces a deformation field to accurately capture complex hand movements. The framework allows for training on multiple identities and can generate animations for unseen poses, demonstrating advanced capabilities in human motion representation and animation.'}, 'zh': {'title': '全身说话的动态神经辐射场', 'desc': '我们提出了一种新颖的框架,能够从单目视频中学习动态神经辐射场(NeRF),用于全身说话的人类。以往的研究仅表示身体姿势或面部表情,而我们的方法结合了身体姿势、手势和面部表情,全面捕捉人类的交流方式。我们提出的TalkinNeRF网络能够同时处理身体、面部和手部的运动,并生成最终结果。该方法支持多身份表示,能够在未见过的姿势下进行鲁棒动画,并且可以根据短视频输入生成新的身份。'}}}];
const articlesContainer = document.getElementById('articles-container');
const sortDropdown = document.getElementById('sort-dropdown');
const categoryFiltersContainer = document.getElementById('category-filters');
const categoryFiltersLogicOptions = document.getElementById('category-options');
const categoryToggle = document.getElementById('category-toggle');
const clearCategoriesButton = document.getElementById('clear-categories');
let selectedCategories = [];
let selectedArticles = [];
let sortBy = 'issue_id';
let showLimitHint = false;
let filterLogicIsAnd = false;
function getUrlParameters() {
const urlParams = new URLSearchParams(window.location.search);
const categoriesParam = urlParams.get('cat');
let categories = categoriesParam ? categoriesParam.split(',') : [];
categories = categories.map(element => `#${element}`);
return categories
}
function updateUrlWithCategories() {
let cleanedCategories = selectedCategories.map(element => element.replace(/^#/, ''));
const newUrl = cleanedCategories.length > 0
? `${window.location.pathname}?cat=${cleanedCategories.join(',')}`
: window.location.pathname;
console.log("cleanedCategories", cleanedCategories)
window.history.pushState({}, '', newUrl);
}
function loadSettings() {
const themeToggle = document.getElementById('theme-toggle');
const sortDropdown = document.getElementById('sort-dropdown');
const isDarkMode = localStorage.getItem('darkMode') === 'true';
let settingSortBy = localStorage.getItem('sort_by');
filterLogicIsAnd = localStorage.getItem('filter_logic_is_and') === 'true';
if (isDarkMode) {
document.body.classList.remove('light-theme');
document.body.classList.add('dark-theme');
themeToggle.checked = true;
const title = document.getElementById('doomgrad');
title.innerHTML = "hf nightly";
const titleSign = document.getElementById('doomgrad-icon');
titleSign.classList.add('rotate');
}
if ((!settingSortBy) || (settingSortBy === 'null')) {
settingSortBy = 'issue_id';
}
if (filterLogicIsAnd) {
document.getElementById('filter-logic-and').checked = true;
} else {
document.getElementById('filter-logic-or').checked = true;
}
sortDropdown.value = settingSortBy;
sortBy = settingSortBy;
}
document.getElementById('theme-toggle').addEventListener('change', toggleTheme);
document.getElementById('filter-logic-and').addEventListener('change', () => {
filterLogicIsAnd = true;
localStorage.setItem('filter_logic_is_and', 'true');
filterAndRenderArticles();
updateSelectedArticlesTitle();
});
document.getElementById('filter-logic-or').addEventListener('change', () => {
filterLogicIsAnd = false;
localStorage.setItem('filter_logic_is_and', 'false');
filterAndRenderArticles();
updateSelectedArticlesTitle();
});
function getUniqueCategories(articles) {
const categories = new Set();
articles.forEach(article => {
if (article.data && article.data.categories) {
article.data.categories.forEach(cat => categories.add(cat));
}
});
let res = Array.from(categories);
res.sort();
return res;
}
function createCategoryButtons() {
//const categories = getUniqueCategories(articlesData);
const categories = ['#3d (4)', '#agents (2)', '#agi (1)', '#alignment', '#architecture (7)', '#audio (1)', '#benchmark (6)', '#cv (6)', '#data (2)', '#dataset (5)', '#diffusion (2)', '#ethics', '#games (3)', '#graphs (2)', '#hallucinations (1)', '#healthcare (1)', '#inference (1)', '#interpretability', '#leakage', '#long_context (1)', '#low_resource', '#machine_translation', '#math', '#multilingual (2)', '#multimodal (2)', '#open_source (5)', '#optimization (6)', '#plp (2)', '#rag (1)', '#reasoning (1)', '#rl (1)', '#rlhf', '#robotics (1)', '#science (2)', '#security', '#small_models (1)', '#story_generation', '#survey', '#synthetic (4)', '#training (5)', '#transfer_learning', '#video (3)'];
categories.forEach(category => {
let catNameSplitted = category.split(/(\s+)/);
let catName = catNameSplitted[0];
const button = document.createElement('span');
button.textContent = catName;
button.className = 'category-button';
if (catNameSplitted.length < 2) {
button.classList.add('inactive');
};
button.onclick = () => toggleCategory(catName, button);
categoryFiltersContainer.appendChild(button);
});
}
function toggleCategory(category, button) {
const index = selectedCategories.indexOf(category);
if (index === -1) {
selectedCategories.push(category);
button.classList.add('active');
} else {
selectedCategories.splice(index, 1);
button.classList.remove('active');
}
filterAndRenderArticles();
saveCategorySelection();
updateSelectedArticlesTitle();
updateUrlWithCategories();
setFilterOptionsVisibility();
}
function saveCategorySelection() {
localStorage.setItem('selectedCategories', JSON.stringify(selectedCategories));
}
function updateSelectedArticlesTitle() {
if ((selectedArticles.length === articlesData.length) & (selectedCategories.length === 0)) {
categoryToggle.textContent = `🏷️ ${filterLabel[currentLang]}`;
} else {
categoryToggle.textContent = `🏷️ ${filterLabel[currentLang]} (${formatArticlesTitle(selectedArticles.length, currentLang)})`;
}
}
function cleanCategorySelection() {
localStorage.setItem('selectedCategories', JSON.stringify('[]'));
}