forked from stanford-crfm/helm
-
Notifications
You must be signed in to change notification settings - Fork 4
/
setup.cfg
116 lines (105 loc) · 3.12 KB
/
setup.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
[metadata]
name = crfm-helm
version = 0.2.2
author = Stanford CRFM
author_email = [email protected]
description = Benchmark for language models
long_description = Benchmark for language models
keywords = language models benchmarking
license = Apache License 2.0
classifiers =
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: 3.8
License :: OSI Approved :: Apache Software License
url = "https://github.com/stanford-crfm/helm"
[options]
python_requires = ~=3.8
package_dir =
=src
packages = find:
zip_safe = False
include_package_data = True
install_requires=
# Common
zstandard~=0.18.0
tqdm~=4.64.1
pyhocon~=0.3.59
dacite~=1.6.0
# Proxy
aleph-alpha-client~=2.14.0
anthropic~=0.2.5
bottle~=0.12.23
gunicorn~=20.1.0
Mako~=1.2.3
# sqlitedict==2.0.0 is slow! https://github.com/RaRe-Technologies/sqlitedict/issues/152
# Keep the version at 1.7.0.
sqlitedict~=1.7.0
pymongo~=4.2.0
retrying~=1.3.3
websocket-client~=1.3.2 # For Anthropic (Legacy stanford-online-all-v4-s3)
openai~=0.27.0
tiktoken~=0.3.3
transformers~=4.28.1
tokenizers~=0.13.2
icetk~=0.0.4
protobuf~=3.20.2 # Can't use 4.21.0 due to backward incompatibility
google-api-python-client~=2.64.0
revChatGPT~=0.1.1
# Scenarios
datasets~=2.5.2 # For math scenarios
gdown~=4.4.0
jsonlines~=3.1.0 # Not really needed
sympy~=1.11.1 # For math scenarios
sentencepiece~=0.1.97
numba~=0.56.4
cattrs~=22.2.0
xlrd~=2.0.1 # Used by pandas.read_excel in ice_scenario
# Metrics
importlib-resources~=5.10.0
nltk~=3.7
scipy~=1.9.1
uncertainty-calibration~=0.1.3
rouge-score~=0.1.2
pyext~=0.7
pytrec_eval==0.5
sacrebleu~=2.2.1
# Work around https://github.com/p-lambda/verified_calibration/issues/11
# TODO: Remove after this issue is resolved
scikit-learn~=1.1.2
spacy~=3.2.4
summ-eval~=0.892
surge-api~=1.1.0
# End users should install a CUDA version of PyTorch manually if needed
torch~=1.12.1 # Summarization metrics
torchvision~=0.13.1
# plotting
colorcet~=3.0.1
matplotlib~=3.6.0
numpy~=1.23.3
seaborn~=0.11.0
[options.entry_points]
console_scripts =
helm-run = helm.benchmark.run:main
helm-summarize = helm.benchmark.presentation.summarize:main
helm-server = helm.benchmark.server:main
helm-create-plots = helm.benchmark.presentation.create_plots:main
crfm-proxy-server = helm.proxy.server:main
crfm-proxy-cli = helm.proxy.cli:main
[options.packages.find]
where = src
exclude =
tests*
# Settings for Flake8: Tool For Style Guide Enforcement
[flake8]
max-line-length = 120
exclude = venv/*
# Ignore completely:
# E203 - White space before ':', (conflicts with black)
# E231 - Missing whitespace after ',', ';', or ':'
# E731 - do not assign a lambda expression, use a def
# W503 - line break before binary operator, (conflicts with black)
# W605 - invalid escape sequence '\', (causes failures)
ignore = E203,E231,E731,W503,W605
# Settings for Mypy: static type checker for Python 3
[mypy]
ignore_missing_imports = True