Skip to content

Commit

Permalink
Update with latest available transforms
Browse files Browse the repository at this point in the history
Signed-off-by: Maroun Touma <[email protected]>
  • Loading branch information
touma-I committed Aug 28, 2024
1 parent 3c57682 commit 35c7e60
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 5 deletions.
6 changes: 4 additions & 2 deletions .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2
DPK_MICRO_VERSION=1
# The suffix is generally always set in the main/development branch and only nulled out when creating release branches.
# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi.
DPK_VERSION_SUFFIX=.dev0
DPK_VERSION_SUFFIX=.dev2

DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX)

Expand Down Expand Up @@ -101,6 +101,8 @@ HEADER_CLEANSER_RAY_VERSION=$(DPK_VERSION)

PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION)

DPK_TRNASFORM_REV=$(DPK_VERSION)

################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.

Expand All @@ -114,4 +116,4 @@ ifeq ($(KFPv2), 1)
WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support
else
WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support
endif
endif
2 changes: 1 addition & 1 deletion data-processing-lib/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit"
version = "0.2.1.dev0"
version = "0.2.1.dev2"
requires-python = ">=3.10"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Library"
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_ray"
version = "0.2.1.dev0"
version = "0.2.1.dev2"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10"
description = "Data Preparation Toolkit Library for Ray"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "[email protected]" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit==0.2.1.dev2",
"ray[default]==2.24.0",
# These two are to fix security issues identified by quay.io
"fastapi>=0.110.2",
Expand Down
24 changes: 24 additions & 0 deletions transforms/packaging/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
data-prep-toolkit==0.2.1.dev2
argparse
boto3>=1.34.69
bs4==0.0.2
clamd==1.0.2
docling[ocr]==1.1.2
duckdb==0.10.1
fasttext==0.9.2
filetype >=1.2.0, <2.0.0
huggingface-hub >= 0.21.4, <1.0.0
langcodes==3.3.0
mmh3==4.1.0
numpy==1.26.4
pandas
parameterized
pyarrow==16.1.0
python-dateutil>=2.8.2
pytz>=2020.1
quackling==0.1.0
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'
sentence-transformers==3.0.1
transformers==4.38.2
tzdata>=2022.7
xxhash==3.4.1
15 changes: 15 additions & 0 deletions transforms/packaging/ray/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
data-prep-toolkit-ray==0.2.1.dev2
data_prep_toolkit_transforms==0.2.1.dev2
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'
parameterized
tqdm==4.66.3
mmh3==4.1.0
xxhash==3.4.1
tqdm==4.66.3
scipy==1.12.0
networkx==3.3
colorlog==6.8.2
func-timeout==4.3.5
pandas==2.2.2
emerge-viz==2.0.0

0 comments on commit 35c7e60

Please sign in to comment.