Skip to content

Commit

Permalink
Allow latest pyarrow version (huggingface#2490)
Browse files Browse the repository at this point in the history
* Allow latest pyarrow version

* Fix test only valid for pyarrow < 4

* Add test for pyarrow >= 4
  • Loading branch information
albertvillanova authored Jun 14, 2021
1 parent 5ba1497 commit 39666f9
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 5 deletions.
4 changes: 2 additions & 2 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- run: source venv/bin/activate
- run: pip install .[tests]
- run: pip install -r additional-tests-requirements.txt --no-deps
- run: pip install pyarrow==3.0.0
- run: pip install pyarrow --upgrade
- run: HF_SCRIPTS_VERSION=master python -m pytest -sv ./tests/

run_dataset_script_tests_pyarrow_1:
Expand Down Expand Up @@ -46,7 +46,7 @@ jobs:
- run: "& venv/Scripts/activate.ps1"
- run: pip install .[tests]
- run: pip install -r additional-tests-requirements.txt --no-deps
- run: pip install pyarrow==3.0.0
- run: pip install pyarrow --upgrade
- run: $env:HF_SCRIPTS_VERSION="master"
- run: python -m pytest -sv ./tests/

Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@
"numpy>=1.17",
# Backend and serialization.
# Minimum 1.0.0 to avoid permission errors on windows when using the compute layer on memory mapped data
"pyarrow>=1.0.0,<4.0.0",
# pyarrow 4.0.0 introduced segfault bug, see: https://github.com/huggingface/datasets/pull/2268
"pyarrow>=1.0.0,!=4.0.0",
# For smart caching dataset processing
"dill",
# For performance gains with apache arrow
Expand Down
9 changes: 7 additions & 2 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,8 +745,13 @@ def test_concatenation_table_cast(
for k, v in zip(in_memory_pa_table.schema.names, in_memory_pa_table.schema.types)
}
)
with pytest.raises(pa.ArrowNotImplementedError):
ConcatenationTable.from_blocks(blocks).cast(schema)
if pa.__version__ < "4":
with pytest.raises(pa.ArrowNotImplementedError):
ConcatenationTable.from_blocks(blocks).cast(schema)
else:
table = ConcatenationTable.from_blocks(blocks).cast(schema)
assert table.table == in_memory_pa_table.cast(schema)
assert isinstance(table, ConcatenationTable)
schema = pa.schema(
{
k: v if v != pa.int64() else pa.int32()
Expand Down

0 comments on commit 39666f9

Please sign in to comment.