-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathtable.py
executable file
·96 lines (71 loc) · 2.2 KB
/
table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
import random
import sys
from collections import Counter, defaultdict
from dump import dump
from utils import choose_predictions
devin_only = False
dnames = sys.argv[1:]
preds = choose_predictions(dnames, devin_only=devin_only)
# dataset = get_dataset()
items = list(preds.items())
random.shuffle(items)
num_instances = len(items)
dump(num_instances)
name = {
"gpt-4o": "Aider with GPT-4o",
"openrouter/anthropic/claude-3-opus": "Aider with Opus",
"n/a": "Aider with GPT-4o",
}
proposed = []
resolved = []
model_proposed = defaultdict(int)
model_resolved = defaultdict(int)
resolved_instances = set()
for inst, pred in items:
is_resolved = pred["resolved"]
model = pred.get("model", "n/a")
attempt = pred["try"]
model = name[model]
key = (attempt, model)
proposed.append(key)
model_proposed[model] += 1
if is_resolved:
resolved.append(key)
model_resolved[model] += 1
resolved_instances.add(inst)
dump(len(resolved_instances))
dump(sorted(resolved_instances))
num_proposed = len(proposed)
dump(num_proposed)
num_resolved = len(resolved)
dump(num_resolved)
counts_proposed = Counter(proposed)
counts_resolved = Counter(resolved)
num = 0
for key, count_p in sorted(counts_proposed.items()):
count_r = counts_resolved[key]
num += 1
attempt, model = key
pct_p = count_p * 100 / num_proposed
pct_r = count_r * 100 / num_resolved
pct_of_all = count_r / num_instances * 100
pct_r_of_p = count_r / count_p * 100
print(
f"| {num} | {model:20} | {count_p:3d} | {pct_p:4.1f}% | {count_r:2d} | {pct_r:4.1f}% |"
f" {pct_of_all:4.1f}% |"
# f" {pct_r_of_p:4.1f}%"
)
pct_of_all = num_resolved / num_instances * 100
print(
f"| **Total** | | **{num_proposed}** | **100%** | **{num_resolved}** | **100%** |"
f" **{pct_of_all:4.1f}%** | "
)
print()
for model in sorted(model_proposed.keys()):
count_p = model_proposed[model]
count_r = model_resolved[model]
pct = count_r * 100 / count_p
print(f"| {model:20} | {count_p:3d} | {count_r:2d} |{pct:4.1f}% |")
pct = num_resolved * 100 / num_proposed
print(f"| **Total** | **{num_proposed}** | **{num_resolved}** |**{pct:4.1f}%** |")