From 4d0c4f37d6cd137aab2db19da71b5791e3e7e77d Mon Sep 17 00:00:00 2001 From: Xingyao Wang Date: Wed, 3 Jul 2024 04:30:38 +0800 Subject: [PATCH] [Evaluation] fix SWE-Bench docker image name (#2751) * fix double underscore * remove unused script --- .../all-swebench-lite-instance-images.txt | 600 +++++++++--------- .../swe_bench/scripts/eval/prep_eval.sh | 16 - 2 files changed, 300 insertions(+), 316 deletions(-) delete mode 100755 evaluation/swe_bench/scripts/eval/prep_eval.sh diff --git a/evaluation/swe_bench/scripts/docker/all-swebench-lite-instance-images.txt b/evaluation/swe_bench/scripts/docker/all-swebench-lite-instance-images.txt index 845f4ddcf479..90f5719add07 100644 --- a/evaluation/swe_bench/scripts/docker/all-swebench-lite-instance-images.txt +++ b/evaluation/swe_bench/scripts/docker/all-swebench-lite-instance-images.txt @@ -34,303 +34,303 @@ sweb.env.x86_64.e3afd7f04b325a4de4982d:latest sweb.env.x86_64.e5bb89bf78258a7d14c34b:latest sweb.env.x86_64.e83e37f52c09532c62acfb:latest sweb.env.x86_64.efa6065ed5bf204410fd53:latest -sweb.eval.x86_64.astropy__astropy-12907:latest -sweb.eval.x86_64.astropy__astropy-14182:latest -sweb.eval.x86_64.astropy__astropy-14365:latest -sweb.eval.x86_64.astropy__astropy-14995:latest -sweb.eval.x86_64.astropy__astropy-6938:latest -sweb.eval.x86_64.astropy__astropy-7746:latest -sweb.eval.x86_64.django__django-10914:latest -sweb.eval.x86_64.django__django-10924:latest -sweb.eval.x86_64.django__django-11001:latest -sweb.eval.x86_64.django__django-11019:latest -sweb.eval.x86_64.django__django-11039:latest -sweb.eval.x86_64.django__django-11049:latest -sweb.eval.x86_64.django__django-11099:latest -sweb.eval.x86_64.django__django-11133:latest -sweb.eval.x86_64.django__django-11179:latest -sweb.eval.x86_64.django__django-11283:latest -sweb.eval.x86_64.django__django-11422:latest -sweb.eval.x86_64.django__django-11564:latest -sweb.eval.x86_64.django__django-11583:latest -sweb.eval.x86_64.django__django-11620:latest -sweb.eval.x86_64.django__django-11630:latest -sweb.eval.x86_64.django__django-11742:latest -sweb.eval.x86_64.django__django-11797:latest -sweb.eval.x86_64.django__django-11815:latest -sweb.eval.x86_64.django__django-11848:latest -sweb.eval.x86_64.django__django-11905:latest -sweb.eval.x86_64.django__django-11910:latest -sweb.eval.x86_64.django__django-11964:latest -sweb.eval.x86_64.django__django-11999:latest -sweb.eval.x86_64.django__django-12113:latest -sweb.eval.x86_64.django__django-12125:latest -sweb.eval.x86_64.django__django-12184:latest -sweb.eval.x86_64.django__django-12284:latest -sweb.eval.x86_64.django__django-12286:latest -sweb.eval.x86_64.django__django-12308:latest -sweb.eval.x86_64.django__django-12453:latest -sweb.eval.x86_64.django__django-12470:latest -sweb.eval.x86_64.django__django-12497:latest -sweb.eval.x86_64.django__django-12589:latest -sweb.eval.x86_64.django__django-12700:latest -sweb.eval.x86_64.django__django-12708:latest -sweb.eval.x86_64.django__django-12747:latest -sweb.eval.x86_64.django__django-12856:latest -sweb.eval.x86_64.django__django-12908:latest -sweb.eval.x86_64.django__django-12915:latest -sweb.eval.x86_64.django__django-12983:latest -sweb.eval.x86_64.django__django-13028:latest -sweb.eval.x86_64.django__django-13033:latest -sweb.eval.x86_64.django__django-13158:latest -sweb.eval.x86_64.django__django-13220:latest -sweb.eval.x86_64.django__django-13230:latest -sweb.eval.x86_64.django__django-13265:latest -sweb.eval.x86_64.django__django-13315:latest -sweb.eval.x86_64.django__django-13321:latest -sweb.eval.x86_64.django__django-13401:latest -sweb.eval.x86_64.django__django-13447:latest -sweb.eval.x86_64.django__django-13448:latest -sweb.eval.x86_64.django__django-13551:latest -sweb.eval.x86_64.django__django-13590:latest -sweb.eval.x86_64.django__django-13658:latest -sweb.eval.x86_64.django__django-13660:latest -sweb.eval.x86_64.django__django-13710:latest -sweb.eval.x86_64.django__django-13757:latest -sweb.eval.x86_64.django__django-13768:latest -sweb.eval.x86_64.django__django-13925:latest -sweb.eval.x86_64.django__django-13933:latest -sweb.eval.x86_64.django__django-13964:latest -sweb.eval.x86_64.django__django-14016:latest -sweb.eval.x86_64.django__django-14017:latest -sweb.eval.x86_64.django__django-14155:latest -sweb.eval.x86_64.django__django-14238:latest -sweb.eval.x86_64.django__django-14382:latest -sweb.eval.x86_64.django__django-14411:latest -sweb.eval.x86_64.django__django-14534:latest -sweb.eval.x86_64.django__django-14580:latest -sweb.eval.x86_64.django__django-14608:latest -sweb.eval.x86_64.django__django-14667:latest -sweb.eval.x86_64.django__django-14672:latest -sweb.eval.x86_64.django__django-14730:latest -sweb.eval.x86_64.django__django-14752:latest -sweb.eval.x86_64.django__django-14787:latest -sweb.eval.x86_64.django__django-14855:latest -sweb.eval.x86_64.django__django-14915:latest -sweb.eval.x86_64.django__django-14997:latest -sweb.eval.x86_64.django__django-14999:latest -sweb.eval.x86_64.django__django-15061:latest -sweb.eval.x86_64.django__django-15202:latest -sweb.eval.x86_64.django__django-15213:latest -sweb.eval.x86_64.django__django-15252:latest -sweb.eval.x86_64.django__django-15320:latest -sweb.eval.x86_64.django__django-15347:latest -sweb.eval.x86_64.django__django-15388:latest -sweb.eval.x86_64.django__django-15400:latest -sweb.eval.x86_64.django__django-15498:latest -sweb.eval.x86_64.django__django-15695:latest -sweb.eval.x86_64.django__django-15738:latest -sweb.eval.x86_64.django__django-15781:latest -sweb.eval.x86_64.django__django-15789:latest -sweb.eval.x86_64.django__django-15790:latest -sweb.eval.x86_64.django__django-15814:latest -sweb.eval.x86_64.django__django-15819:latest -sweb.eval.x86_64.django__django-15851:latest -sweb.eval.x86_64.django__django-15902:latest -sweb.eval.x86_64.django__django-15996:latest -sweb.eval.x86_64.django__django-16041:latest -sweb.eval.x86_64.django__django-16046:latest -sweb.eval.x86_64.django__django-16139:latest -sweb.eval.x86_64.django__django-16229:latest -sweb.eval.x86_64.django__django-16255:latest -sweb.eval.x86_64.django__django-16379:latest -sweb.eval.x86_64.django__django-16400:latest -sweb.eval.x86_64.django__django-16408:latest -sweb.eval.x86_64.django__django-16527:latest -sweb.eval.x86_64.django__django-16595:latest -sweb.eval.x86_64.django__django-16816:latest -sweb.eval.x86_64.django__django-16820:latest -sweb.eval.x86_64.django__django-16873:latest -sweb.eval.x86_64.django__django-16910:latest -sweb.eval.x86_64.django__django-17051:latest -sweb.eval.x86_64.django__django-17087:latest -sweb.eval.x86_64.matplotlib__matplotlib-18869:latest -sweb.eval.x86_64.matplotlib__matplotlib-22711:latest -sweb.eval.x86_64.matplotlib__matplotlib-22835:latest -sweb.eval.x86_64.matplotlib__matplotlib-23299:latest -sweb.eval.x86_64.matplotlib__matplotlib-23314:latest -sweb.eval.x86_64.matplotlib__matplotlib-23476:latest -sweb.eval.x86_64.matplotlib__matplotlib-23562:latest -sweb.eval.x86_64.matplotlib__matplotlib-23563:latest -sweb.eval.x86_64.matplotlib__matplotlib-23913:latest -sweb.eval.x86_64.matplotlib__matplotlib-23964:latest -sweb.eval.x86_64.matplotlib__matplotlib-23987:latest -sweb.eval.x86_64.matplotlib__matplotlib-24149:latest -sweb.eval.x86_64.matplotlib__matplotlib-24265:latest -sweb.eval.x86_64.matplotlib__matplotlib-24334:latest -sweb.eval.x86_64.matplotlib__matplotlib-24970:latest -sweb.eval.x86_64.matplotlib__matplotlib-25079:latest -sweb.eval.x86_64.matplotlib__matplotlib-25311:latest -sweb.eval.x86_64.matplotlib__matplotlib-25332:latest -sweb.eval.x86_64.matplotlib__matplotlib-25433:latest -sweb.eval.x86_64.matplotlib__matplotlib-25442:latest -sweb.eval.x86_64.matplotlib__matplotlib-25498:latest -sweb.eval.x86_64.matplotlib__matplotlib-26011:latest -sweb.eval.x86_64.matplotlib__matplotlib-26020:latest -sweb.eval.x86_64.mwaskom__seaborn-2848:latest -sweb.eval.x86_64.mwaskom__seaborn-3010:latest -sweb.eval.x86_64.mwaskom__seaborn-3190:latest -sweb.eval.x86_64.mwaskom__seaborn-3407:latest -sweb.eval.x86_64.pallets__flask-4045:latest -sweb.eval.x86_64.pallets__flask-4992:latest -sweb.eval.x86_64.pallets__flask-5063:latest -sweb.eval.x86_64.psf__requests-1963:latest -sweb.eval.x86_64.psf__requests-2148:latest -sweb.eval.x86_64.psf__requests-2317:latest -sweb.eval.x86_64.psf__requests-2674:latest -sweb.eval.x86_64.psf__requests-3362:latest -sweb.eval.x86_64.psf__requests-863:latest -sweb.eval.x86_64.pydata__xarray-3364:latest -sweb.eval.x86_64.pydata__xarray-4094:latest -sweb.eval.x86_64.pydata__xarray-4248:latest -sweb.eval.x86_64.pydata__xarray-4493:latest -sweb.eval.x86_64.pydata__xarray-5131:latest -sweb.eval.x86_64.pylint-dev__pylint-5859:latest -sweb.eval.x86_64.pylint-dev__pylint-6506:latest -sweb.eval.x86_64.pylint-dev__pylint-7080:latest -sweb.eval.x86_64.pylint-dev__pylint-7114:latest -sweb.eval.x86_64.pylint-dev__pylint-7228:latest -sweb.eval.x86_64.pylint-dev__pylint-7993:latest -sweb.eval.x86_64.pytest-dev__pytest-11143:latest -sweb.eval.x86_64.pytest-dev__pytest-11148:latest -sweb.eval.x86_64.pytest-dev__pytest-5103:latest -sweb.eval.x86_64.pytest-dev__pytest-5221:latest -sweb.eval.x86_64.pytest-dev__pytest-5227:latest -sweb.eval.x86_64.pytest-dev__pytest-5413:latest -sweb.eval.x86_64.pytest-dev__pytest-5495:latest -sweb.eval.x86_64.pytest-dev__pytest-5692:latest -sweb.eval.x86_64.pytest-dev__pytest-6116:latest -sweb.eval.x86_64.pytest-dev__pytest-7168:latest -sweb.eval.x86_64.pytest-dev__pytest-7220:latest -sweb.eval.x86_64.pytest-dev__pytest-7373:latest -sweb.eval.x86_64.pytest-dev__pytest-7432:latest -sweb.eval.x86_64.pytest-dev__pytest-7490:latest -sweb.eval.x86_64.pytest-dev__pytest-8365:latest -sweb.eval.x86_64.pytest-dev__pytest-8906:latest -sweb.eval.x86_64.pytest-dev__pytest-9359:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-10297:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-10508:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-10949:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-11040:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-11281:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-12471:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13142:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13241:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13439:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13496:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13497:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13584:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-13779:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-14087:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-14092:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-14894:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-14983:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-15512:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-15535:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-25500:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-25570:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-25638:latest -sweb.eval.x86_64.scikit-learn__scikit-learn-25747:latest -sweb.eval.x86_64.sphinx-doc__sphinx-10325:latest -sweb.eval.x86_64.sphinx-doc__sphinx-10451:latest -sweb.eval.x86_64.sphinx-doc__sphinx-11445:latest -sweb.eval.x86_64.sphinx-doc__sphinx-7686:latest -sweb.eval.x86_64.sphinx-doc__sphinx-7738:latest -sweb.eval.x86_64.sphinx-doc__sphinx-7975:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8273:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8282:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8435:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8474:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8506:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8595:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8627:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8713:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8721:latest -sweb.eval.x86_64.sphinx-doc__sphinx-8801:latest -sweb.eval.x86_64.sympy__sympy-11400:latest -sweb.eval.x86_64.sympy__sympy-11870:latest -sweb.eval.x86_64.sympy__sympy-11897:latest -sweb.eval.x86_64.sympy__sympy-12171:latest -sweb.eval.x86_64.sympy__sympy-12236:latest -sweb.eval.x86_64.sympy__sympy-12419:latest -sweb.eval.x86_64.sympy__sympy-12454:latest -sweb.eval.x86_64.sympy__sympy-12481:latest -sweb.eval.x86_64.sympy__sympy-13031:latest -sweb.eval.x86_64.sympy__sympy-13043:latest -sweb.eval.x86_64.sympy__sympy-13146:latest -sweb.eval.x86_64.sympy__sympy-13177:latest -sweb.eval.x86_64.sympy__sympy-13437:latest -sweb.eval.x86_64.sympy__sympy-13471:latest -sweb.eval.x86_64.sympy__sympy-13480:latest -sweb.eval.x86_64.sympy__sympy-13647:latest -sweb.eval.x86_64.sympy__sympy-13773:latest -sweb.eval.x86_64.sympy__sympy-13895:latest -sweb.eval.x86_64.sympy__sympy-13915:latest -sweb.eval.x86_64.sympy__sympy-13971:latest -sweb.eval.x86_64.sympy__sympy-14024:latest -sweb.eval.x86_64.sympy__sympy-14308:latest -sweb.eval.x86_64.sympy__sympy-14317:latest -sweb.eval.x86_64.sympy__sympy-14396:latest -sweb.eval.x86_64.sympy__sympy-14774:latest -sweb.eval.x86_64.sympy__sympy-14817:latest -sweb.eval.x86_64.sympy__sympy-15011:latest -sweb.eval.x86_64.sympy__sympy-15308:latest -sweb.eval.x86_64.sympy__sympy-15345:latest -sweb.eval.x86_64.sympy__sympy-15346:latest -sweb.eval.x86_64.sympy__sympy-15609:latest -sweb.eval.x86_64.sympy__sympy-15678:latest -sweb.eval.x86_64.sympy__sympy-16106:latest -sweb.eval.x86_64.sympy__sympy-16281:latest -sweb.eval.x86_64.sympy__sympy-16503:latest -sweb.eval.x86_64.sympy__sympy-16792:latest -sweb.eval.x86_64.sympy__sympy-16988:latest -sweb.eval.x86_64.sympy__sympy-17022:latest -sweb.eval.x86_64.sympy__sympy-17139:latest -sweb.eval.x86_64.sympy__sympy-17630:latest -sweb.eval.x86_64.sympy__sympy-17655:latest -sweb.eval.x86_64.sympy__sympy-18057:latest -sweb.eval.x86_64.sympy__sympy-18087:latest -sweb.eval.x86_64.sympy__sympy-18189:latest -sweb.eval.x86_64.sympy__sympy-18199:latest -sweb.eval.x86_64.sympy__sympy-18532:latest -sweb.eval.x86_64.sympy__sympy-18621:latest -sweb.eval.x86_64.sympy__sympy-18698:latest -sweb.eval.x86_64.sympy__sympy-18835:latest -sweb.eval.x86_64.sympy__sympy-19007:latest -sweb.eval.x86_64.sympy__sympy-19254:latest -sweb.eval.x86_64.sympy__sympy-19487:latest -sweb.eval.x86_64.sympy__sympy-20049:latest -sweb.eval.x86_64.sympy__sympy-20154:latest -sweb.eval.x86_64.sympy__sympy-20212:latest -sweb.eval.x86_64.sympy__sympy-20322:latest -sweb.eval.x86_64.sympy__sympy-20442:latest -sweb.eval.x86_64.sympy__sympy-20590:latest -sweb.eval.x86_64.sympy__sympy-20639:latest -sweb.eval.x86_64.sympy__sympy-21055:latest -sweb.eval.x86_64.sympy__sympy-21171:latest -sweb.eval.x86_64.sympy__sympy-21379:latest -sweb.eval.x86_64.sympy__sympy-21612:latest -sweb.eval.x86_64.sympy__sympy-21614:latest -sweb.eval.x86_64.sympy__sympy-21627:latest -sweb.eval.x86_64.sympy__sympy-21847:latest -sweb.eval.x86_64.sympy__sympy-22005:latest -sweb.eval.x86_64.sympy__sympy-22714:latest -sweb.eval.x86_64.sympy__sympy-22840:latest -sweb.eval.x86_64.sympy__sympy-23117:latest -sweb.eval.x86_64.sympy__sympy-23191:latest -sweb.eval.x86_64.sympy__sympy-23262:latest -sweb.eval.x86_64.sympy__sympy-24066:latest -sweb.eval.x86_64.sympy__sympy-24102:latest -sweb.eval.x86_64.sympy__sympy-24152:latest -sweb.eval.x86_64.sympy__sympy-24213:latest -sweb.eval.x86_64.sympy__sympy-24909:latest +sweb.eval.x86_64.astropy_s_astropy-12907:latest +sweb.eval.x86_64.astropy_s_astropy-14182:latest +sweb.eval.x86_64.astropy_s_astropy-14365:latest +sweb.eval.x86_64.astropy_s_astropy-14995:latest +sweb.eval.x86_64.astropy_s_astropy-6938:latest +sweb.eval.x86_64.astropy_s_astropy-7746:latest +sweb.eval.x86_64.django_s_django-10914:latest +sweb.eval.x86_64.django_s_django-10924:latest +sweb.eval.x86_64.django_s_django-11001:latest +sweb.eval.x86_64.django_s_django-11019:latest +sweb.eval.x86_64.django_s_django-11039:latest +sweb.eval.x86_64.django_s_django-11049:latest +sweb.eval.x86_64.django_s_django-11099:latest +sweb.eval.x86_64.django_s_django-11133:latest +sweb.eval.x86_64.django_s_django-11179:latest +sweb.eval.x86_64.django_s_django-11283:latest +sweb.eval.x86_64.django_s_django-11422:latest +sweb.eval.x86_64.django_s_django-11564:latest +sweb.eval.x86_64.django_s_django-11583:latest +sweb.eval.x86_64.django_s_django-11620:latest +sweb.eval.x86_64.django_s_django-11630:latest +sweb.eval.x86_64.django_s_django-11742:latest +sweb.eval.x86_64.django_s_django-11797:latest +sweb.eval.x86_64.django_s_django-11815:latest +sweb.eval.x86_64.django_s_django-11848:latest +sweb.eval.x86_64.django_s_django-11905:latest +sweb.eval.x86_64.django_s_django-11910:latest +sweb.eval.x86_64.django_s_django-11964:latest +sweb.eval.x86_64.django_s_django-11999:latest +sweb.eval.x86_64.django_s_django-12113:latest +sweb.eval.x86_64.django_s_django-12125:latest +sweb.eval.x86_64.django_s_django-12184:latest +sweb.eval.x86_64.django_s_django-12284:latest +sweb.eval.x86_64.django_s_django-12286:latest +sweb.eval.x86_64.django_s_django-12308:latest +sweb.eval.x86_64.django_s_django-12453:latest +sweb.eval.x86_64.django_s_django-12470:latest +sweb.eval.x86_64.django_s_django-12497:latest +sweb.eval.x86_64.django_s_django-12589:latest +sweb.eval.x86_64.django_s_django-12700:latest +sweb.eval.x86_64.django_s_django-12708:latest +sweb.eval.x86_64.django_s_django-12747:latest +sweb.eval.x86_64.django_s_django-12856:latest +sweb.eval.x86_64.django_s_django-12908:latest +sweb.eval.x86_64.django_s_django-12915:latest +sweb.eval.x86_64.django_s_django-12983:latest +sweb.eval.x86_64.django_s_django-13028:latest +sweb.eval.x86_64.django_s_django-13033:latest +sweb.eval.x86_64.django_s_django-13158:latest +sweb.eval.x86_64.django_s_django-13220:latest +sweb.eval.x86_64.django_s_django-13230:latest +sweb.eval.x86_64.django_s_django-13265:latest +sweb.eval.x86_64.django_s_django-13315:latest +sweb.eval.x86_64.django_s_django-13321:latest +sweb.eval.x86_64.django_s_django-13401:latest +sweb.eval.x86_64.django_s_django-13447:latest +sweb.eval.x86_64.django_s_django-13448:latest +sweb.eval.x86_64.django_s_django-13551:latest +sweb.eval.x86_64.django_s_django-13590:latest +sweb.eval.x86_64.django_s_django-13658:latest +sweb.eval.x86_64.django_s_django-13660:latest +sweb.eval.x86_64.django_s_django-13710:latest +sweb.eval.x86_64.django_s_django-13757:latest +sweb.eval.x86_64.django_s_django-13768:latest +sweb.eval.x86_64.django_s_django-13925:latest +sweb.eval.x86_64.django_s_django-13933:latest +sweb.eval.x86_64.django_s_django-13964:latest +sweb.eval.x86_64.django_s_django-14016:latest +sweb.eval.x86_64.django_s_django-14017:latest +sweb.eval.x86_64.django_s_django-14155:latest +sweb.eval.x86_64.django_s_django-14238:latest +sweb.eval.x86_64.django_s_django-14382:latest +sweb.eval.x86_64.django_s_django-14411:latest +sweb.eval.x86_64.django_s_django-14534:latest +sweb.eval.x86_64.django_s_django-14580:latest +sweb.eval.x86_64.django_s_django-14608:latest +sweb.eval.x86_64.django_s_django-14667:latest +sweb.eval.x86_64.django_s_django-14672:latest +sweb.eval.x86_64.django_s_django-14730:latest +sweb.eval.x86_64.django_s_django-14752:latest +sweb.eval.x86_64.django_s_django-14787:latest +sweb.eval.x86_64.django_s_django-14855:latest +sweb.eval.x86_64.django_s_django-14915:latest +sweb.eval.x86_64.django_s_django-14997:latest +sweb.eval.x86_64.django_s_django-14999:latest +sweb.eval.x86_64.django_s_django-15061:latest +sweb.eval.x86_64.django_s_django-15202:latest +sweb.eval.x86_64.django_s_django-15213:latest +sweb.eval.x86_64.django_s_django-15252:latest +sweb.eval.x86_64.django_s_django-15320:latest +sweb.eval.x86_64.django_s_django-15347:latest +sweb.eval.x86_64.django_s_django-15388:latest +sweb.eval.x86_64.django_s_django-15400:latest +sweb.eval.x86_64.django_s_django-15498:latest +sweb.eval.x86_64.django_s_django-15695:latest +sweb.eval.x86_64.django_s_django-15738:latest +sweb.eval.x86_64.django_s_django-15781:latest +sweb.eval.x86_64.django_s_django-15789:latest +sweb.eval.x86_64.django_s_django-15790:latest +sweb.eval.x86_64.django_s_django-15814:latest +sweb.eval.x86_64.django_s_django-15819:latest +sweb.eval.x86_64.django_s_django-15851:latest +sweb.eval.x86_64.django_s_django-15902:latest +sweb.eval.x86_64.django_s_django-15996:latest +sweb.eval.x86_64.django_s_django-16041:latest +sweb.eval.x86_64.django_s_django-16046:latest +sweb.eval.x86_64.django_s_django-16139:latest +sweb.eval.x86_64.django_s_django-16229:latest +sweb.eval.x86_64.django_s_django-16255:latest +sweb.eval.x86_64.django_s_django-16379:latest +sweb.eval.x86_64.django_s_django-16400:latest +sweb.eval.x86_64.django_s_django-16408:latest +sweb.eval.x86_64.django_s_django-16527:latest +sweb.eval.x86_64.django_s_django-16595:latest +sweb.eval.x86_64.django_s_django-16816:latest +sweb.eval.x86_64.django_s_django-16820:latest +sweb.eval.x86_64.django_s_django-16873:latest +sweb.eval.x86_64.django_s_django-16910:latest +sweb.eval.x86_64.django_s_django-17051:latest +sweb.eval.x86_64.django_s_django-17087:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-18869:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-22711:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-22835:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23299:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23314:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23476:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23562:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23563:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23913:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23964:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-23987:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-24149:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-24265:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-24334:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-24970:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-25079:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-25311:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-25332:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-25433:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-25442:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-25498:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-26011:latest +sweb.eval.x86_64.matplotlib_s_matplotlib-26020:latest +sweb.eval.x86_64.mwaskom_s_seaborn-2848:latest +sweb.eval.x86_64.mwaskom_s_seaborn-3010:latest +sweb.eval.x86_64.mwaskom_s_seaborn-3190:latest +sweb.eval.x86_64.mwaskom_s_seaborn-3407:latest +sweb.eval.x86_64.pallets_s_flask-4045:latest +sweb.eval.x86_64.pallets_s_flask-4992:latest +sweb.eval.x86_64.pallets_s_flask-5063:latest +sweb.eval.x86_64.psf_s_requests-1963:latest +sweb.eval.x86_64.psf_s_requests-2148:latest +sweb.eval.x86_64.psf_s_requests-2317:latest +sweb.eval.x86_64.psf_s_requests-2674:latest +sweb.eval.x86_64.psf_s_requests-3362:latest +sweb.eval.x86_64.psf_s_requests-863:latest +sweb.eval.x86_64.pydata_s_xarray-3364:latest +sweb.eval.x86_64.pydata_s_xarray-4094:latest +sweb.eval.x86_64.pydata_s_xarray-4248:latest +sweb.eval.x86_64.pydata_s_xarray-4493:latest +sweb.eval.x86_64.pydata_s_xarray-5131:latest +sweb.eval.x86_64.pylint-dev_s_pylint-5859:latest +sweb.eval.x86_64.pylint-dev_s_pylint-6506:latest +sweb.eval.x86_64.pylint-dev_s_pylint-7080:latest +sweb.eval.x86_64.pylint-dev_s_pylint-7114:latest +sweb.eval.x86_64.pylint-dev_s_pylint-7228:latest +sweb.eval.x86_64.pylint-dev_s_pylint-7993:latest +sweb.eval.x86_64.pytest-dev_s_pytest-11143:latest +sweb.eval.x86_64.pytest-dev_s_pytest-11148:latest +sweb.eval.x86_64.pytest-dev_s_pytest-5103:latest +sweb.eval.x86_64.pytest-dev_s_pytest-5221:latest +sweb.eval.x86_64.pytest-dev_s_pytest-5227:latest +sweb.eval.x86_64.pytest-dev_s_pytest-5413:latest +sweb.eval.x86_64.pytest-dev_s_pytest-5495:latest +sweb.eval.x86_64.pytest-dev_s_pytest-5692:latest +sweb.eval.x86_64.pytest-dev_s_pytest-6116:latest +sweb.eval.x86_64.pytest-dev_s_pytest-7168:latest +sweb.eval.x86_64.pytest-dev_s_pytest-7220:latest +sweb.eval.x86_64.pytest-dev_s_pytest-7373:latest +sweb.eval.x86_64.pytest-dev_s_pytest-7432:latest +sweb.eval.x86_64.pytest-dev_s_pytest-7490:latest +sweb.eval.x86_64.pytest-dev_s_pytest-8365:latest +sweb.eval.x86_64.pytest-dev_s_pytest-8906:latest +sweb.eval.x86_64.pytest-dev_s_pytest-9359:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-10297:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-10508:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-10949:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-11040:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-11281:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-12471:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13142:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13241:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13439:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13496:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13497:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13584:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-13779:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-14087:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-14092:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-14894:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-14983:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-15512:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-15535:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-25500:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-25570:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-25638:latest +sweb.eval.x86_64.scikit-learn_s_scikit-learn-25747:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-10325:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-10451:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-11445:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-7686:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-7738:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-7975:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8273:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8282:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8435:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8474:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8506:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8595:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8627:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8713:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8721:latest +sweb.eval.x86_64.sphinx-doc_s_sphinx-8801:latest +sweb.eval.x86_64.sympy_s_sympy-11400:latest +sweb.eval.x86_64.sympy_s_sympy-11870:latest +sweb.eval.x86_64.sympy_s_sympy-11897:latest +sweb.eval.x86_64.sympy_s_sympy-12171:latest +sweb.eval.x86_64.sympy_s_sympy-12236:latest +sweb.eval.x86_64.sympy_s_sympy-12419:latest +sweb.eval.x86_64.sympy_s_sympy-12454:latest +sweb.eval.x86_64.sympy_s_sympy-12481:latest +sweb.eval.x86_64.sympy_s_sympy-13031:latest +sweb.eval.x86_64.sympy_s_sympy-13043:latest +sweb.eval.x86_64.sympy_s_sympy-13146:latest +sweb.eval.x86_64.sympy_s_sympy-13177:latest +sweb.eval.x86_64.sympy_s_sympy-13437:latest +sweb.eval.x86_64.sympy_s_sympy-13471:latest +sweb.eval.x86_64.sympy_s_sympy-13480:latest +sweb.eval.x86_64.sympy_s_sympy-13647:latest +sweb.eval.x86_64.sympy_s_sympy-13773:latest +sweb.eval.x86_64.sympy_s_sympy-13895:latest +sweb.eval.x86_64.sympy_s_sympy-13915:latest +sweb.eval.x86_64.sympy_s_sympy-13971:latest +sweb.eval.x86_64.sympy_s_sympy-14024:latest +sweb.eval.x86_64.sympy_s_sympy-14308:latest +sweb.eval.x86_64.sympy_s_sympy-14317:latest +sweb.eval.x86_64.sympy_s_sympy-14396:latest +sweb.eval.x86_64.sympy_s_sympy-14774:latest +sweb.eval.x86_64.sympy_s_sympy-14817:latest +sweb.eval.x86_64.sympy_s_sympy-15011:latest +sweb.eval.x86_64.sympy_s_sympy-15308:latest +sweb.eval.x86_64.sympy_s_sympy-15345:latest +sweb.eval.x86_64.sympy_s_sympy-15346:latest +sweb.eval.x86_64.sympy_s_sympy-15609:latest +sweb.eval.x86_64.sympy_s_sympy-15678:latest +sweb.eval.x86_64.sympy_s_sympy-16106:latest +sweb.eval.x86_64.sympy_s_sympy-16281:latest +sweb.eval.x86_64.sympy_s_sympy-16503:latest +sweb.eval.x86_64.sympy_s_sympy-16792:latest +sweb.eval.x86_64.sympy_s_sympy-16988:latest +sweb.eval.x86_64.sympy_s_sympy-17022:latest +sweb.eval.x86_64.sympy_s_sympy-17139:latest +sweb.eval.x86_64.sympy_s_sympy-17630:latest +sweb.eval.x86_64.sympy_s_sympy-17655:latest +sweb.eval.x86_64.sympy_s_sympy-18057:latest +sweb.eval.x86_64.sympy_s_sympy-18087:latest +sweb.eval.x86_64.sympy_s_sympy-18189:latest +sweb.eval.x86_64.sympy_s_sympy-18199:latest +sweb.eval.x86_64.sympy_s_sympy-18532:latest +sweb.eval.x86_64.sympy_s_sympy-18621:latest +sweb.eval.x86_64.sympy_s_sympy-18698:latest +sweb.eval.x86_64.sympy_s_sympy-18835:latest +sweb.eval.x86_64.sympy_s_sympy-19007:latest +sweb.eval.x86_64.sympy_s_sympy-19254:latest +sweb.eval.x86_64.sympy_s_sympy-19487:latest +sweb.eval.x86_64.sympy_s_sympy-20049:latest +sweb.eval.x86_64.sympy_s_sympy-20154:latest +sweb.eval.x86_64.sympy_s_sympy-20212:latest +sweb.eval.x86_64.sympy_s_sympy-20322:latest +sweb.eval.x86_64.sympy_s_sympy-20442:latest +sweb.eval.x86_64.sympy_s_sympy-20590:latest +sweb.eval.x86_64.sympy_s_sympy-20639:latest +sweb.eval.x86_64.sympy_s_sympy-21055:latest +sweb.eval.x86_64.sympy_s_sympy-21171:latest +sweb.eval.x86_64.sympy_s_sympy-21379:latest +sweb.eval.x86_64.sympy_s_sympy-21612:latest +sweb.eval.x86_64.sympy_s_sympy-21614:latest +sweb.eval.x86_64.sympy_s_sympy-21627:latest +sweb.eval.x86_64.sympy_s_sympy-21847:latest +sweb.eval.x86_64.sympy_s_sympy-22005:latest +sweb.eval.x86_64.sympy_s_sympy-22714:latest +sweb.eval.x86_64.sympy_s_sympy-22840:latest +sweb.eval.x86_64.sympy_s_sympy-23117:latest +sweb.eval.x86_64.sympy_s_sympy-23191:latest +sweb.eval.x86_64.sympy_s_sympy-23262:latest +sweb.eval.x86_64.sympy_s_sympy-24066:latest +sweb.eval.x86_64.sympy_s_sympy-24102:latest +sweb.eval.x86_64.sympy_s_sympy-24152:latest +sweb.eval.x86_64.sympy_s_sympy-24213:latest +sweb.eval.x86_64.sympy_s_sympy-24909:latest diff --git a/evaluation/swe_bench/scripts/eval/prep_eval.sh b/evaluation/swe_bench/scripts/eval/prep_eval.sh deleted file mode 100755 index 81e03d08826e..000000000000 --- a/evaluation/swe_bench/scripts/eval/prep_eval.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -echo "Cloning OpenDevin SWE-Bench Fork" -git clone https://github.com/OpenDevin/SWE-bench.git evaluation/swe_bench/eval_workspace/SWE-bench - -# checkout to main-old -pushd evaluation/swe_bench/eval_workspace/SWE-bench -git checkout main-old -popd - -echo "Pulling all evaluation dockers..." -evaluation/swe_bench/scripts/docker/pull_all_eval_docker.sh - -echo "Downloading SWE-bench data..." -mkdir -p evaluation/swe_bench/eval_workspace/eval_data/instances -poetry run python3 evaluation/swe_bench/scripts/eval/download_swe_bench_data.py evaluation/swe_bench/eval_workspace/eval_data/instances