Skip to content

Commit

Permalink
Align DataArrays based on coords in Dataset constructor (pydata#1826)
Browse files Browse the repository at this point in the history
* Align da based on explicit coords

* add tags to gitignore

* random formatting spot

* initial tests

* apply the test to the right degree of freedom - the coords of the variable added in

* couple more for gitignore

* @stickler-ci doesn't like `range`

* more tests

* more gitignores

* whats new

* raise

* message=

* Add all testmon files to gitignore

* cast single dim tuples to indexes

* test on different dataset coords types

* updated whatsnew

* version from Stephan's feedback; works but not clean

* I think much cleaner version

* formatting
  • Loading branch information
max-sixty authored and shoyer committed May 31, 2018
1 parent 9c80059 commit 7036eb5
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 2 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ nosetests.xml
.ropeproject/
.tags*
.testmon*
.tmontmp/
.pytest_cache
dask-worker-space/

Expand Down
5 changes: 5 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ Enhancements
to manage its version strings. (:issue:`1300`).
By `Joe Hamman <https://github.com/jhamman>`_.

- `:py:class:`Dataset`s align `:py:class:`DataArray`s to coords that are explicitly
passed into the constructor, where previously an error would be raised.
(:issue:`674`)
By `Maximilian Roos <https://github.com/maxim-lian`_.

- :py:meth:`~DataArray.sel`, :py:meth:`~DataArray.isel` & :py:meth:`~DataArray.reindex`,
(and their :py:class:`Dataset` counterparts) now support supplying a ``dict``
as a first argument, as an alternative to the existing approach
Expand Down
12 changes: 11 additions & 1 deletion xarray/core/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,7 +360,17 @@ def merge_data_and_coords(data, coords, compat='broadcast_equals',
"""Used in Dataset.__init__."""
objs = [data, coords]
explicit_coords = coords.keys()
return merge_core(objs, compat, join, explicit_coords=explicit_coords)
indexes = dict(extract_indexes(coords))
return merge_core(objs, compat, join, explicit_coords=explicit_coords,
indexes=indexes)


def extract_indexes(coords):
"""Yields the name & index of valid indexes from a mapping of coords"""
for name, variable in coords.items():
variable = as_variable(variable, name=name)
if variable.dims == (name,):
yield name, variable.to_index()


def assert_valid_explicit_coords(variables, dims, explicit_coords):
Expand Down
41 changes: 40 additions & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4193,6 +4193,45 @@ def test_isin_dataset():
ds.isin(ds)


@pytest.mark.parametrize('unaligned_coords', (
{'x': [2, 1, 0]},
{'x': (['x'], np.asarray([2, 1, 0]))},
{'x': (['x'], np.asarray([1, 2, 0]))},
{'x': pd.Index([2, 1, 0])},
{'x': Variable(dims='x', data=[0, 2, 1])},
{'x': IndexVariable(dims='x', data=[0, 1, 2])},
{'y': 42},
{'y': ('x', [2, 1, 0])},
{'y': ('x', np.asarray([2, 1, 0]))},
{'y': (['x'], np.asarray([2, 1, 0]))},
))
@pytest.mark.parametrize('coords', (
{'x': ('x', [0, 1, 2])},
{'x': [0, 1, 2]},
))
def test_dataset_constructor_aligns_to_explicit_coords(
unaligned_coords, coords):

a = xr.DataArray([1, 2, 3], dims=['x'], coords=unaligned_coords)

expected = xr.Dataset(coords=coords)
expected['a'] = a

result = xr.Dataset({'a': a}, coords=coords)

assert_equal(expected, result)


@pytest.mark.parametrize('unaligned_coords', (
{'y': ('b', np.asarray([2, 1, 0]))},
))
def test_constructor_raises_with_invalid_coords(unaligned_coords):

with pytest.raises(ValueError,
message='not a subset of the DataArray dimensions'):
xr.DataArray([1, 2, 3], dims=['x'], coords=unaligned_coords)


def test_dir_expected_attrs(data_set):

some_expected_attrs = {'pipe', 'mean', 'isnull', 'var1',
Expand All @@ -4205,7 +4244,7 @@ def test_dir_non_string(data_set):
# add a numbered key to ensure this doesn't break dir
data_set[5] = 'foo'
result = dir(data_set)
assert not (5 in result)
assert 5 not in result

# GH2172
sample_data = np.random.uniform(size=[2, 2000, 10000])
Expand Down

0 comments on commit 7036eb5

Please sign in to comment.