forked from deepset-ai/haystack
-
Notifications
You must be signed in to change notification settings - Fork 1
/
setup.cfg
257 lines (232 loc) · 6.72 KB
/
setup.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
[metadata]
name = farm-haystack
version = file: VERSION.txt
url = https://github.com/deepset-ai/haystack
project_urls =
Docs: RTD = https://haystack.deepset.ai/overview/intro
CI: GitHub = https://github.com/deepset-ai/haystack/actions
GitHub: issues = https://github.com/deepset-ai/haystack/issues
GitHub: repo = https://github.com/deepset-ai/haystack
description = Neural Question Answering & Semantic Search at Scale. Use modern transformer based models like BERT to find answers in large document collections
long_description = file: README.md
long_description_content_type = text/markdown
keywords=
QA
Question-Answering
Reader
Retriever
semantic-search
search
BERT
roberta
albert
squad
mrc
transfer-learning
language-model
transformer
author = deepset.ai
author_email = [email protected]
license = Apache License 2.0
license_file = LICENSE
platforms = any
classifiers =
Development Status :: 5 - Production/Stable
Intended Audience :: Science/Research
License :: Freely Distributable
License :: OSI Approved :: Apache Software License
Topic :: Scientific/Engineering :: Artificial Intelligence
Operating System :: OS Independent
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10
[options]
use_scm_version = True
python_requires = >=3.7, <4
packages = find:
setup_requires =
setuptools
wheel
install_requires =
importlib-metadata; python_version < '3.8'
torch>1.9,<1.13
requests
pydantic
transformers==4.20.1
nltk
pandas
# Utils
dill # pickle extension for (de-)serialization
tqdm # progress bars in model download and training scripts
networkx # graphs library
mmh3 # fast hashing function (murmurhash3)
quantulum3 # quantities extraction from text
posthog # telemetry
azure-ai-formrecognizer==3.2.0b2 # forms reader
# azure-core is a dependency of azure-ai-formrecognizer
# In order to stop malicious pip backtracking during pip install farm-haystack[all] documented in https://github.com/deepset-ai/haystack/issues/2280
# we have to resolve a dependency version conflict ourself.
# azure-core>=1.23 conflicts with pydoc-markdown's dependency on databind>=1.5.0 which itself requires typing-extensions<4.0.0
# azure-core>=1.23 needs typing-extensions>=4.0.1
# pip unfortunately backtracks into the databind direction ultimately getting lost.
azure-core<1.23
# audio's espnet-model-zoo requires huggingface-hub version <0.8 while we need >=0.5 to be able to use create_repo in FARMReader
huggingface-hub<0.8.0,>=0.5.0
# Preprocessing
more_itertools # for windowing
python-docx
langdetect # for PDF conversions
tika # Apache Tika (text & metadata extractor)
# See haystack/nodes/retriever/_embedding_encoder.py, _SentenceTransformersEmbeddingEncoder
sentence-transformers>=2.2.0
# for stats in run_classifier
scipy>=1.3.2
scikit-learn>=1.0.0
# Metrics and logging
seqeval
mlflow
# Elasticsearch
elasticsearch>=7.7,<7.11
elastic-apm
# context matching
rapidfuzz>=2.0.15,<3
# Schema validation
jsonschema
[options.packages.find]
exclude =
rest_api*
test*
tutorials*
ui*
[options.package_data]
haystack =
json-schemas/*.schema.json
[options.extras_require]
sql =
sqlalchemy>=1.4.2,<2
sqlalchemy_utils
psycopg2-binary; platform_system != 'Windows'
only-faiss =
faiss-cpu>=1.6.3,<2
faiss =
farm-haystack[sql,only-faiss]
only-faiss-gpu =
faiss-gpu>=1.6.3,<2
faiss-gpu =
farm-haystack[sql,only-faiss-gpu]
only-milvus1 =
pymilvus<2.0.0 # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
milvus1 =
farm-haystack[sql,only-milvus1]
only-milvus =
pymilvus>=2.0.0,<3 # Refer milvus version support matrix at https://github.com/milvus-io/pymilvus#install-pymilvus
milvus =
farm-haystack[sql,only-milvus]
weaviate =
weaviate-client==3.3.3
only-pinecone =
pinecone-client
pinecone =
farm-haystack[sql,only-pinecone]
graphdb =
SPARQLWrapper
inmemorygraph =
SPARQLWrapper
docstores =
farm-haystack[faiss,milvus,weaviate,graphdb,inmemorygraph,pinecone]
docstores-gpu =
farm-haystack[faiss-gpu,milvus,weaviate,graphdb,inmemorygraph,pinecone]
audio =
espnet
espnet-model-zoo
pydub
beir =
beir; platform_system != 'Windows'
crawler =
selenium !=4.1.4 # due to https://github.com/SeleniumHQ/selenium/issues/10612
webdriver-manager
preprocessing =
beautifulsoup4
markdown
python-magic; platform_system != 'Windows' # Depends on libmagic: https://pypi.org/project/python-magic/
python-magic-bin; platform_system == 'Windows' # Needs to be installed without python-magic, otherwise Windows CI gets stuck.
ocr =
pytesseract==0.3.7
pillow
pdf2image==1.14.0
onnx =
onnxruntime
onnxruntime_tools
onnx-gpu =
onnxruntime-gpu
onnxruntime_tools
ray =
ray>=1.9.1,<2; platform_system != 'Windows'
ray>=1.9.1,<2,!=1.12.0; platform_system == 'Windows' # Avoid 1.12.0 due to https://github.com/ray-project/ray/issues/24169 (fails on windows)
aiorwlock>=1.3.0,<2
colab =
grpcio==1.43.0
dev =
# Type check
mypy
typing_extensions; python_version < '3.8'
# Test
pytest
pytest-custom_exit_code # used in the CI
responses
tox
coverage
python-multipart
psutil
# Linting
pylint
# Code formatting
black[jupyter]
# Documentation
pydoc-markdown==4.5.1 # FIXME Unpin!
mkdocs
jupytercontrib
watchdog #==1.0.2
requests-cache
test =
farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev]
all =
farm-haystack[docstores,audio,crawler,preprocessing,ocr,ray,dev,onnx,beir]
all-gpu =
farm-haystack[docstores-gpu,audio,crawler,preprocessing,ocr,ray,dev,onnx-gpu,beir]
[tool:pytest]
testpaths =
test
rest_api/test
ui/test
python_files =
test_*.py
addopts =
-vv
[mypy]
warn_return_any = false
warn_unused_configs = true
ignore_missing_imports = true
plugins = pydantic.mypy
[tox]
requires = tox-venv
setuptools >= 30.0.0
envlist = py36,py37
[testenv]
changedir = test
deps =
coverage
pytest
pandas
setenv =
COVERAGE_FILE = test-reports/.coverage
PYTEST_ADDOPTS = --junitxml=test-reports/{envname}/junit.xml -vv
commands =
coverage run --source haystack --parallel-mode -m pytest {posargs}
coverage combine
coverage report -m
coverage html -d test-reports/coverage-html
coverage xml -o test-reports/coverage.xml