setup.cfg

[metadata]
name = crfm-helm
version = 0.2.2
author = Stanford CRFM
author_email = contact-crfm@stanford.edu
description = Benchmark for language models
long_description = Benchmark for language models
keywords = language models benchmarking
license = Apache License 2.0
classifiers =
    Programming Language :: Python :: 3 :: Only
    Programming Language :: Python :: 3.8
    License :: OSI Approved :: Apache Software License
url = "https://github.com/stanford-crfm/helm"

[options]
python_requires = ~=3.8
package_dir =
    =src
packages = find:
zip_safe = False
include_package_data = True

install_requires=
    # Common
    zstandard~=0.18.0
    tqdm~=4.64.1
    pyhocon~=0.3.59
    dacite~=1.6.0

    # Proxy
    aleph-alpha-client~=2.14.0
    anthropic~=0.2.5
    bottle~=0.12.23
    gunicorn~=20.1.0
    Mako~=1.2.3
    # sqlitedict==2.0.0 is slow! https://github.com/RaRe-Technologies/sqlitedict/issues/152
    # Keep the version at 1.7.0.
    sqlitedict~=1.7.0
    pymongo~=4.2.0
    retrying~=1.3.3
    websocket-client~=1.3.2 # For Anthropic (Legacy stanford-online-all-v4-s3)
    openai~=0.27.0
    tiktoken~=0.3.3
    transformers~=4.28.1
    tokenizers~=0.13.2
    icetk~=0.0.4
    protobuf~=3.20.2  # Can't use 4.21.0 due to backward incompatibility
    google-api-python-client~=2.64.0
    revChatGPT~=0.1.1

    # Scenarios
    datasets~=2.5.2  # For math scenarios
    gdown~=4.4.0
    jsonlines~=3.1.0  # Not really needed
    sympy~=1.11.1  # For math scenarios
    sentencepiece~=0.1.97
    numba~=0.56.4
    cattrs~=22.2.0
    xlrd~=2.0.1  # Used by pandas.read_excel in ice_scenario

    # Metrics
    importlib-resources~=5.10.0
    nltk~=3.7
    scipy~=1.9.1
    uncertainty-calibration~=0.1.3
    rouge-score~=0.1.2
    pyext~=0.7
    pytrec_eval==0.5
    sacrebleu~=2.2.1
    # Work around https://github.com/p-lambda/verified_calibration/issues/11
    # TODO: Remove after this issue is resolved
    scikit-learn~=1.1.2
    spacy~=3.2.4
    summ-eval~=0.892
    surge-api~=1.1.0
    # End users should install a CUDA version of PyTorch manually if needed
    torch~=1.12.1  # Summarization metrics
    torchvision~=0.13.1

    # plotting
    colorcet~=3.0.1
    matplotlib~=3.6.0
    numpy~=1.23.3
    seaborn~=0.11.0

[options.entry_points]
console_scripts = 
    helm-run = helm.benchmark.run:main
    helm-summarize = helm.benchmark.presentation.summarize:main
    helm-server = helm.benchmark.server:main
    helm-create-plots = helm.benchmark.presentation.create_plots:main
    crfm-proxy-server = helm.proxy.server:main
    crfm-proxy-cli = helm.proxy.cli:main

[options.packages.find]
where = src
exclude =
    tests*

# Settings for Flake8: Tool For Style Guide Enforcement
[flake8]
max-line-length = 120
exclude = venv/*

# Ignore completely:
# E203 - White space before ':', (conflicts with black)
# E231 - Missing whitespace after ',', ';', or ':'
# E731 - do not assign a lambda expression, use a def
# W503 - line break before binary operator, (conflicts with black)
# W605 - invalid escape sequence '\', (causes failures)
ignore = E203,E231,E731,W503,W605

# Settings for Mypy: static type checker for Python 3
[mypy]
ignore_missing_imports = True