Skip to content

Commit

Permalink
Limit and format number of displayed dimensions in repr (pydata#5662)
Browse files Browse the repository at this point in the history
* Truncate dims

* better name and no typing

* Use limited formatting on dataarrays

* limit unindexed dims, code cleanup

* typing

* typing

* typing

* typing

* typing

* handle hashables

* Add test for element formatter

* Update test_formatting.py

* remove the trailing whitespace

* Remove trailing whitespaces

* Update whats-new.rst

* Update whats-new.rst

* Move to breaking changes instead

* Add typing to tests.

* With OPTIONS typed we can add more typing

* Fix errors in tests

* Update whats-new.rst
  • Loading branch information
Illviljan authored Jan 3, 2022
1 parent b88c65a commit be4b980
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 19 deletions.
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ New Features

Breaking changes
~~~~~~~~~~~~~~~~
- Improve repr readability when there are a large number of dimensions in datasets or dataarrays by
wrapping the text once the maximum display width has been exceeded. (:issue: `5546`, :pull:`5662`)
By `Jimmy Westling <https://github.com/illviljan>`_.


Deprecations
Expand Down
105 changes: 94 additions & 11 deletions xarray/core/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import functools
from datetime import datetime, timedelta
from itertools import chain, zip_longest
from typing import Hashable
from typing import Collection, Hashable, Optional

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -97,6 +97,16 @@ def last_item(array):
return np.ravel(np.asarray(array[indexer])).tolist()


def calc_max_rows_first(max_rows: int) -> int:
"""Calculate the first rows to maintain the max number of rows."""
return max_rows // 2 + max_rows % 2


def calc_max_rows_last(max_rows: int) -> int:
"""Calculate the last rows to maintain the max number of rows."""
return max_rows // 2


def format_timestamp(t):
"""Cast given object to a Timestamp and return a nicely formatted string"""
# Timestamp is only valid for 1678 to 2262
Expand Down Expand Up @@ -384,11 +394,11 @@ def _mapping_repr(
summary = [f"{summary[0]} ({len_mapping})"]
elif max_rows is not None and len_mapping > max_rows:
summary = [f"{summary[0]} ({max_rows}/{len_mapping})"]
first_rows = max_rows // 2 + max_rows % 2
first_rows = calc_max_rows_first(max_rows)
keys = list(mapping.keys())
summary += [summarizer(k, mapping[k], col_width) for k in keys[:first_rows]]
if max_rows > 1:
last_rows = max_rows // 2
last_rows = calc_max_rows_last(max_rows)
summary += [pretty_print(" ...", col_width) + " ..."]
summary += [
summarizer(k, mapping[k], col_width) for k in keys[-last_rows:]
Expand Down Expand Up @@ -441,11 +451,74 @@ def dim_summary(obj):
return ", ".join(elements)


def unindexed_dims_repr(dims, coords):
def _element_formatter(
elements: Collection[Hashable],
col_width: int,
max_rows: Optional[int] = None,
delimiter: str = ", ",
) -> str:
"""
Formats elements for better readability.
Once it becomes wider than the display width it will create a newline and
continue indented to col_width.
Once there are more rows than the maximum displayed rows it will start
removing rows.
Parameters
----------
elements : Collection of hashable
Elements to join together.
col_width : int
The width to indent to if a newline has been made.
max_rows : int, optional
The maximum number of allowed rows. The default is None.
delimiter : str, optional
Delimiter to use between each element. The default is ", ".
"""
elements_len = len(elements)
out = [""]
length_row = 0
for i, v in enumerate(elements):
delim = delimiter if i < elements_len - 1 else ""
v_delim = f"{v}{delim}"
length_element = len(v_delim)
length_row += length_element

# Create a new row if the next elements makes the print wider than
# the maximum display width:
if col_width + length_row > OPTIONS["display_width"]:
out[-1] = out[-1].rstrip() # Remove trailing whitespace.
out.append("\n" + pretty_print("", col_width) + v_delim)
length_row = length_element
else:
out[-1] += v_delim

# If there are too many rows of dimensions trim some away:
if max_rows and (len(out) > max_rows):
first_rows = calc_max_rows_first(max_rows)
last_rows = calc_max_rows_last(max_rows)
out = (
out[:first_rows]
+ ["\n" + pretty_print("", col_width) + "..."]
+ (out[-last_rows:] if max_rows > 1 else [])
)
return "".join(out)


def dim_summary_limited(obj, col_width: int, max_rows: Optional[int] = None) -> str:
elements = [f"{k}: {v}" for k, v in obj.sizes.items()]
return _element_formatter(elements, col_width, max_rows)


def unindexed_dims_repr(dims, coords, max_rows: Optional[int] = None):
unindexed_dims = [d for d in dims if d not in coords]
if unindexed_dims:
dims_str = ", ".join(f"{d}" for d in unindexed_dims)
return "Dimensions without coordinates: " + dims_str
dims_start = "Dimensions without coordinates: "
dims_str = _element_formatter(
unindexed_dims, col_width=len(dims_start), max_rows=max_rows
)
return dims_start + dims_str
else:
return None

Expand Down Expand Up @@ -505,6 +578,8 @@ def short_data_repr(array):
def array_repr(arr):
from .variable import Variable

max_rows = OPTIONS["display_max_rows"]

# used for DataArray, Variable and IndexVariable
if hasattr(arr, "name") and arr.name is not None:
name_str = f"{arr.name!r} "
Expand All @@ -520,16 +595,23 @@ def array_repr(arr):
else:
data_repr = inline_variable_array_repr(arr.variable, OPTIONS["display_width"])

start = f"<xarray.{type(arr).__name__} {name_str}"
dims = dim_summary_limited(arr, col_width=len(start) + 1, max_rows=max_rows)
summary = [
"<xarray.{} {}({})>".format(type(arr).__name__, name_str, dim_summary(arr)),
f"{start}({dims})>",
data_repr,
]

if hasattr(arr, "coords"):
if arr.coords:
summary.append(repr(arr.coords))
col_width = _calculate_col_width(_get_col_items(arr.coords))
summary.append(
coords_repr(arr.coords, col_width=col_width, max_rows=max_rows)
)

unindexed_dims_str = unindexed_dims_repr(arr.dims, arr.coords)
unindexed_dims_str = unindexed_dims_repr(
arr.dims, arr.coords, max_rows=max_rows
)
if unindexed_dims_str:
summary.append(unindexed_dims_str)

Expand All @@ -546,12 +628,13 @@ def dataset_repr(ds):
max_rows = OPTIONS["display_max_rows"]

dims_start = pretty_print("Dimensions:", col_width)
summary.append("{}({})".format(dims_start, dim_summary(ds)))
dims_values = dim_summary_limited(ds, col_width=col_width + 1, max_rows=max_rows)
summary.append(f"{dims_start}({dims_values})")

if ds.coords:
summary.append(coords_repr(ds.coords, col_width=col_width, max_rows=max_rows))

unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords)
unindexed_dims_str = unindexed_dims_repr(ds.dims, ds.coords, max_rows=max_rows)
if unindexed_dims_str:
summary.append(unindexed_dims_str)

Expand Down
50 changes: 42 additions & 8 deletions xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,18 +552,52 @@ def test__mapping_repr(display_max_rows, n_vars, n_attr) -> None:
assert len_summary == n_vars

with xr.set_options(
display_max_rows=display_max_rows,
display_expand_coords=False,
display_expand_data_vars=False,
display_expand_attrs=False,
):
actual = formatting.dataset_repr(ds)
coord_s = ", ".join([f"{c}: {len(v)}" for c, v in coords.items()])
expected = dedent(
f"""\
<xarray.Dataset>
Dimensions: ({coord_s})
Coordinates: ({n_vars})
Data variables: ({n_vars})
Attributes: ({n_attr})"""
col_width = formatting._calculate_col_width(
formatting._get_col_items(ds.variables)
)
dims_start = formatting.pretty_print("Dimensions:", col_width)
dims_values = formatting.dim_summary_limited(
ds, col_width=col_width + 1, max_rows=display_max_rows
)
expected = f"""\
<xarray.Dataset>
{dims_start}({dims_values})
Coordinates: ({n_vars})
Data variables: ({n_vars})
Attributes: ({n_attr})"""
expected = dedent(expected)
assert actual == expected


def test__element_formatter(n_elements: int = 100) -> None:
expected = """\
Dimensions without coordinates: dim_0: 3, dim_1: 3, dim_2: 3, dim_3: 3,
dim_4: 3, dim_5: 3, dim_6: 3, dim_7: 3,
dim_8: 3, dim_9: 3, dim_10: 3, dim_11: 3,
dim_12: 3, dim_13: 3, dim_14: 3, dim_15: 3,
dim_16: 3, dim_17: 3, dim_18: 3, dim_19: 3,
dim_20: 3, dim_21: 3, dim_22: 3, dim_23: 3,
...
dim_76: 3, dim_77: 3, dim_78: 3, dim_79: 3,
dim_80: 3, dim_81: 3, dim_82: 3, dim_83: 3,
dim_84: 3, dim_85: 3, dim_86: 3, dim_87: 3,
dim_88: 3, dim_89: 3, dim_90: 3, dim_91: 3,
dim_92: 3, dim_93: 3, dim_94: 3, dim_95: 3,
dim_96: 3, dim_97: 3, dim_98: 3, dim_99: 3"""
expected = dedent(expected)

intro = "Dimensions without coordinates: "
elements = [
f"{k}: {v}" for k, v in {f"dim_{k}": 3 for k in np.arange(n_elements)}.items()
]
values = xr.core.formatting._element_formatter(
elements, col_width=len(intro), max_rows=12
)
actual = intro + values
assert expected == actual

0 comments on commit be4b980

Please sign in to comment.