Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ux): include basename of path in generated table names in read_*() #10522

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
chore: clean up registered tables after every test
  • Loading branch information
NickCrews committed Dec 6, 2024
commit f7cb256ffdcd6df34fd3a0553801db8ab1b5e8d2
27 changes: 27 additions & 0 deletions ibis/backends/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,19 @@ def pushd(new_dir):
os.chdir(previous_dir)


def drop(table: ibis.Table):
backend = table._find_backend()
name = table.get_name()
try:
backend.drop_table(name)
Copy link
Contributor Author

@NickCrews NickCrews Dec 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is one example use case that may inform the API we design in #8382. For example, a singular con.drop(table_or_view) would be nice.

return
except Exception as e:
# This is a lazy way to check if the error is due to the table being a view
if "view" not in str(e).lower():
raise
backend.drop_view(name)


@pytest.fixture
def gzip_csv(data_dir, tmp_path):
basename = "diamonds.csv"
Expand Down Expand Up @@ -113,6 +126,7 @@ def test_register_csv(con, data_dir, fname, table_name):

if con.name != "datafusion":
table.count().execute()
drop(table)


# TODO: rewrite or delete test when register api is removed
Expand All @@ -139,6 +153,7 @@ def test_register_csv_gz(con, data_dir, gzip_csv):
table = con.register(gzip_csv)

assert table.count().execute()
drop(table)


# TODO: rewrite or delete test when register api is removed
Expand Down Expand Up @@ -168,6 +183,7 @@ def test_register_with_dotted_name(con, data_dir, tmp_path):

if con.name != "datafusion":
table.count().execute()
drop(table)


def read_table(path: Path) -> Iterator[tuple[str, pa.Table]]:
Expand Down Expand Up @@ -226,6 +242,7 @@ def test_register_parquet(con, tmp_path, data_dir, fname, table_name):
assert new_tables.pop() == table_name
if con.name != "datafusion":
table.count().execute()
drop(table)


# TODO: rewrite or delete test when register api is removed
Expand Down Expand Up @@ -270,6 +287,7 @@ def test_register_iterator_parquet(
new_tables = set(con.list_tables()) - tables_before
assert len(new_tables) == 1
assert table.count().execute()
drop(table)


# TODO: remove entirely when `register` is removed
Expand Down Expand Up @@ -299,11 +317,13 @@ def test_register_pandas(con):
with pytest.warns(FutureWarning, match="v9.1"):
t = con.register(df)
assert t.x.sum().execute() == 6
drop(t)

with pytest.warns(FutureWarning, match="v9.1"):
t = con.register(df, "my_table")
assert t.op().name == "my_table"
assert t.x.sum().execute() == 6
drop(t)


# TODO: remove entirely when `register` is removed
Expand Down Expand Up @@ -333,6 +353,7 @@ def test_register_pyarrow_tables(con):
with pytest.warns(FutureWarning, match="v9.1"):
t = con.register(pa_t)
assert t.x.sum().execute() == 6
drop(t)


@pytest.mark.notyet(
Expand Down Expand Up @@ -371,6 +392,7 @@ def test_csv_reregister_schema(con, tmp_path):
assert result_schema["cola"].is_integer()
assert result_schema["colb"].is_float64()
assert result_schema["colc"].is_string()
drop(foo_table)


@pytest.mark.notimpl(
Expand Down Expand Up @@ -433,6 +455,7 @@ def test_read_parquet(con, tmp_path, data_dir, fname, in_table_name):
if in_table_name is not None:
assert table.op().name == in_table_name
assert table.count().execute()
drop(table)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -469,6 +492,7 @@ def test_read_parquet_glob(con, tmp_path, ft_data):
table = con.read_parquet(tmp_path / f"*.{ext}")

assert table.count().execute() == nrows * ntables
drop(table)


@pytest.mark.notyet(
Expand Down Expand Up @@ -497,6 +521,7 @@ def test_read_csv_glob(con, tmp_path, ft_data):
table = con.read_csv(tmp_path / f"*.{ext}")

assert table.count().execute() == nrows * ntables
drop(table)


@pytest.mark.notyet(
Expand Down Expand Up @@ -532,6 +557,7 @@ def test_read_json_glob(con, tmp_path, ft_data):
table = con.read_json(tmp_path / f"*.{ext}")

assert table.count().execute() == nrows * ntables
drop(table)


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -592,3 +618,4 @@ def test_read_csv(con, data_dir, in_table_name, num_diamonds):
}
)
assert table.count().execute() == num_diamonds
drop(table)
4 changes: 1 addition & 3 deletions ibis/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,9 +530,7 @@ def gen_name_from_path(path: str | Path) -> str:
Examples
--------
>>> gen_name_from_path("s3://path/to/myfile.csv") # doctest: +ELLIPSIS
'ibis_read_s3__path__to__myfile__csv...'
>>> gen_name_from_path("s3://long_long_long_path/to/myfile.csv") # doctest: +ELLIPSIS
'ibis_read_s3__myfile__csv...'
'ibis_read_myfile_csv...'
"""
basename = os.path.basename(path)
basename = re.sub(r"[^a-zA-Z0-9_]", "_", basename)
Expand Down
Loading