Skip to content

Commit

Permalink
More API reorganization
Browse files Browse the repository at this point in the history
Renamed: index -> as_index, indexed_by -> labeled, indexed_by -> indexed

Also, cleaned up the behavior of Dataset.select() as described in the README
(yes, that probably should have been a separate commit).
  • Loading branch information
shoyer committed Apr 14, 2014
1 parent 3753832 commit 5e997e9
Show file tree
Hide file tree
Showing 13 changed files with 246 additions and 233 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ Aspects of the API that we currently intend to change:
- The constructor for `DataArray` objects will change, so that it is possible
to create new `DataArray` objects without putting them into a `Dataset`
first.
- We currently check `var.attributes['coordinates']` for figuring out which
- ~~We currently check `var.attributes['coordinates']` for figuring out which
variables to select with `Dataset.select`. This will probably be removed:
we don't want users to rely on attribute metadata that is not necessarily
maintained by array operations.
maintained by array operations.~~
- Array reduction methods like `mean` may change to NA skipping versions
(like pandas).
- Array indexing will be made lazy, instead of immediately creating an
Expand Down
6 changes: 3 additions & 3 deletions test/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ def test_orthogonal_indexing(self):
in_memory = create_test_data()
on_disk = self.roundtrip(in_memory)
indexers = {'dim1': range(3), 'dim2': range(4), 'dim3': range(5)}
expected = in_memory.indexed_by(**indexers)
actual = on_disk.indexed_by(**indexers)
expected = in_memory.indexed(**indexers)
actual = on_disk.indexed(**indexers)
self.assertDatasetAllClose(expected, actual)
# do it twice, to make sure we're switched from orthogonal -> numpy
# when we cached the values
actual = on_disk.indexed_by(**indexers)
actual = on_disk.indexed(**indexers)
self.assertDatasetAllClose(expected, actual)


Expand Down
22 changes: 11 additions & 11 deletions test/test_data_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,18 +65,18 @@ def test_items(self):
self.assertEqual(self.dv[0, 0].dataset,
Dataset({'foo': self.dv.variable[0, 0]}))

def test_indexed_by(self):
self.assertEqual(self.dv[0].dataset, self.ds.indexed_by(x=0))
def test_indexed(self):
self.assertEqual(self.dv[0].dataset, self.ds.indexed(x=0))
self.assertEqual(self.dv[:3, :5].dataset,
self.ds.indexed_by(x=slice(3), y=slice(5)))
self.assertDataArrayEqual(self.dv, self.dv.indexed_by(x=slice(None)))
self.assertDataArrayEqual(self.dv[:3], self.dv.indexed_by(x=slice(3)))
self.ds.indexed(x=slice(3), y=slice(5)))
self.assertDataArrayEqual(self.dv, self.dv.indexed(x=slice(None)))
self.assertDataArrayEqual(self.dv[:3], self.dv.indexed(x=slice(3)))

def test_labeled_by(self):
def test_labeled(self):
self.ds['x'] = ('x', np.array(list('abcdefghij')))
self.assertDataArrayEqual(self.dv, self.dv.labeled_by(x=slice(None)))
self.assertDataArrayEqual(self.dv[1], self.dv.labeled_by(x='b'))
self.assertDataArrayEqual(self.dv[:3], self.dv.labeled_by(x=slice('c')))
self.assertDataArrayEqual(self.dv, self.dv.labeled(x=slice(None)))
self.assertDataArrayEqual(self.dv[1], self.dv.labeled(x='b'))
self.assertDataArrayEqual(self.dv[:3], self.dv.labeled(x=slice('c')))

def test_loc(self):
self.ds['x'] = ('x', np.array(list('abcdefghij')))
Expand Down Expand Up @@ -238,8 +238,8 @@ def test_groupby(self):
grouped = self.dv.groupby('abc', squeeze=True)
expected_sum_all = Dataset(
{'foo': Variable(['abc'], np.array([self.x[:, :9].sum(),
self.x[:, 10:].sum(),
self.x[:, 9:10].sum()]).T,
self.x[:, 10:].sum(),
self.x[:, 9:10].sum()]).T,
{'cell_methods': 'x: y: sum'}),
'abc': Variable(['abc'], np.array(['a', 'b', 'c']))})['foo']
self.assertDataArrayAllClose(
Expand Down
76 changes: 36 additions & 40 deletions test/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,13 @@
import mock
import unittest

from collections import OrderedDict
from copy import deepcopy
from textwrap import dedent

import numpy as np
import pandas as pd

from xray import (Dataset, DataArray, Variable, backends, open_dataset, utils,
align, conventions)
from xray import Dataset, DataArray, Variable, backends, utils, align

from . import TestCase, requires_netCDF4
from . import TestCase


_dims = {'dim1': 100, 'dim2': 50, 'dim3': 10}
Expand Down Expand Up @@ -135,7 +131,7 @@ def test_coordinate(self):
attributes = {'foo': 'bar'}
a['x'] = ('x', vec, attributes)
self.assertTrue('x' in a.coordinates)
self.assertIsInstance(a.coordinates['x'].index, pd.Index)
self.assertIsInstance(a.coordinates['x'].as_index, pd.Index)
self.assertVariableEqual(a.coordinates['x'], a.variables['x'])
b = Dataset()
b['x'] = ('x', vec, attributes)
Expand All @@ -155,10 +151,10 @@ def test_coordinate(self):
a['y'] = ('y', scal)
self.assertTrue('y' not in a.dimensions)

def test_indexed_by(self):
def test_indexed(self):
data = create_test_data()
slicers = {'dim1': slice(None, None, 2), 'dim2': slice(0, 2)}
ret = data.indexed_by(**slicers)
ret = data.indexed(**slicers)

# Verify that only the specified dimension was altered
self.assertItemsEqual(data.dimensions, ret.dimensions)
Expand All @@ -183,47 +179,47 @@ def test_indexed_by(self):
np.testing.assert_array_equal(expected, actual)

with self.assertRaises(ValueError):
data.indexed_by(not_a_dim=slice(0, 2))
data.indexed(not_a_dim=slice(0, 2))

ret = data.indexed_by(dim1=0)
ret = data.indexed(dim1=0)
self.assertEqual({'time': 20, 'dim2': 50, 'dim3': 10}, ret.dimensions)
self.assertItemsEqual(list(data.noncoordinates) + ['dim1'],
ret.noncoordinates)

ret = data.indexed_by(time=slice(2), dim1=0, dim2=slice(5))
ret = data.indexed(time=slice(2), dim1=0, dim2=slice(5))
self.assertEqual({'time': 2, 'dim2': 5, 'dim3': 10}, ret.dimensions)
self.assertItemsEqual(list(data.noncoordinates) + ['dim1'],
ret.noncoordinates)

ret = data.indexed_by(time=0, dim1=0, dim2=slice(5))
ret = data.indexed(time=0, dim1=0, dim2=slice(5))
self.assertItemsEqual({'dim2': 5, 'dim3': 10}, ret.dimensions)
self.assertItemsEqual(list(data.noncoordinates) + ['dim1', 'time'],
ret.noncoordinates)

def test_labeled_by(self):
def test_labeled(self):
data = create_test_data()
int_slicers = {'dim1': slice(None, None, 2),
'dim2': slice(2),
'dim3': slice(3)}
loc_slicers = {'dim1': slice(None, None, 2),
'dim2': slice(0, 0.5),
'dim3': slice('a', 'c')}
self.assertEqual(data.indexed_by(**int_slicers),
data.labeled_by(**loc_slicers))
self.assertEqual(data.indexed(**int_slicers),
data.labeled(**loc_slicers))
data['time'] = ('time', pd.date_range('2000-01-01', periods=20))
self.assertEqual(data.indexed_by(time=0),
data.labeled_by(time='2000-01-01'))
self.assertEqual(data.indexed_by(time=slice(10)),
data.labeled_by(time=slice('2000-01-01',
self.assertEqual(data.indexed(time=0),
data.labeled(time='2000-01-01'))
self.assertEqual(data.indexed(time=slice(10)),
data.labeled(time=slice('2000-01-01',
'2000-01-10')))
self.assertEqual(data, data.labeled_by(time=slice('1999', '2005')))
self.assertEqual(data.indexed_by(time=slice(3)),
data.labeled_by(
self.assertEqual(data, data.labeled(time=slice('1999', '2005')))
self.assertEqual(data.indexed(time=slice(3)),
data.labeled(
time=pd.date_range('2000-01-01', periods=3)))

def test_reindex_like(self):
data = create_test_data()
expected = data.indexed_by(dim1=slice(10), time=slice(13))
expected = data.indexed(dim1=slice(10), time=slice(13))
actual = data.reindex_like(expected)
self.assertDatasetEqual(actual, expected)

Expand All @@ -236,14 +232,14 @@ def test_reindex_like(self):

def test_reindex(self):
data = create_test_data()
expected = data.indexed_by(dim1=slice(10))
expected = data.indexed(dim1=slice(10))
actual = data.reindex(dim1=data['dim1'][:10])
self.assertDatasetEqual(actual, expected)

actual = data.reindex(dim1=data['dim1'][:10].values)
self.assertDatasetEqual(actual, expected)

actual = data.reindex(dim1=data['dim1'][:10].index)
actual = data.reindex(dim1=data['dim1'][:10].as_index)
self.assertDatasetEqual(actual, expected)

def test_align(self):
Expand All @@ -263,23 +259,23 @@ def test_align(self):
left2, right2 = align(left, right, join='outer')
self.assertVariableEqual(left2['dim3'], right2['dim3'])
self.assertArrayEqual(left2['dim3'], union)
self.assertDatasetEqual(left2.labeled_by(dim3=intersection),
right2.labeled_by(dim3=intersection))
self.assertDatasetEqual(left2.labeled(dim3=intersection),
right2.labeled(dim3=intersection))
self.assertTrue(np.isnan(left2['var3'][-2:]).all())
self.assertTrue(np.isnan(right2['var3'][:2]).all())

left2, right2 = align(left, right, join='left')
self.assertVariableEqual(left2['dim3'], right2['dim3'])
self.assertVariableEqual(left2['dim3'], left['dim3'])
self.assertDatasetEqual(left2.labeled_by(dim3=intersection),
right2.labeled_by(dim3=intersection))
self.assertDatasetEqual(left2.labeled(dim3=intersection),
right2.labeled(dim3=intersection))
self.assertTrue(np.isnan(right2['var3'][:2]).all())

left2, right2 = align(left, right, join='right')
self.assertVariableEqual(left2['dim3'], right2['dim3'])
self.assertVariableEqual(left2['dim3'], right['dim3'])
self.assertDatasetEqual(left2.labeled_by(dim3=intersection),
right2.labeled_by(dim3=intersection))
self.assertDatasetEqual(left2.labeled(dim3=intersection),
right2.labeled(dim3=intersection))
self.assertTrue(np.isnan(left2['var3'][-2:]).all())

def test_variable_indexing(self):
Expand Down Expand Up @@ -389,7 +385,7 @@ def test_merge(self):
actual = ds1.merge(ds2)
self.assertEqual(expected, actual)
with self.assertRaises(ValueError):
ds1.merge(ds2.indexed_by(dim1=slice(2)))
ds1.merge(ds2.indexed(dim1=slice(2)))
with self.assertRaises(ValueError):
ds1.merge(ds2.rename({'var3': 'var1'}))

Expand All @@ -405,14 +401,14 @@ def test_virtual_variables(self):
self.assertVariableEqual(data['time.dayofyear'],
Variable('time', 1 + np.arange(20)))
self.assertArrayEqual(data['time.month'].values,
data.variables['time'].index.month)
data.variables['time'].as_index.month)
# test virtual variable math
self.assertArrayEqual(data['time.dayofyear'] + 1, 2 + np.arange(20))
self.assertArrayEqual(np.sin(data['time.dayofyear']),
np.sin(1 + np.arange(20)))
# test slicing the virtual variable -- it should still be virtual
actual = data['time.dayofyear'][:10].dataset
expected = data.indexed_by(time=slice(10))
expected = data.indexed(time=slice(10))
self.assertDatasetEqual(expected, actual)

def test_setitem(self):
Expand Down Expand Up @@ -467,9 +463,9 @@ def test_groupby(self):
self.assertEqual(len(groupby), 3)
expected_groups = {'a': 0, 'b': 1, 'c': 2}
self.assertEqual(groupby.groups, expected_groups)
expected_items = [('a', data.indexed_by(x=0)),
('b', data.indexed_by(x=1)),
('c', data.indexed_by(x=2))]
expected_items = [('a', data.indexed(x=0)),
('b', data.indexed(x=1)),
('c', data.indexed(x=2))]
self.assertEqual(list(groupby), expected_items)

identity = lambda x: x
Expand All @@ -487,8 +483,8 @@ def test_groupby(self):
def test_concat(self):
data = create_test_data()

split_data = [data.indexed_by(dim1=slice(10)),
data.indexed_by(dim1=slice(10, None))]
split_data = [data.indexed(dim1=slice(10)),
data.indexed(dim1=slice(10, None))]
self.assertDatasetEqual(data, Dataset.concat(split_data, 'dim1'))

def rectify_dim_order(dataset):
Expand Down
8 changes: 4 additions & 4 deletions test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import numpy as np
import pandas as pd

from xray import utils, Variable
from xray import utils, Dataset, Variable
from . import TestCase, ReturnItem, requires_netCDF4


Expand Down Expand Up @@ -60,10 +60,10 @@ def test_orthogonal_indexer(self):
with self.assertRaisesRegexp(ValueError, 'only supports 1d'):
utils.orthogonal_indexer(x > 0, x.shape)

def test_remap_loc_indexers(self):
def test_remap_label_indexers(self):
# TODO: fill in more tests!
indices = {'x': Variable(['x'], pd.Index([1, 2, 3]))}
test_indexer = lambda x: utils.remap_loc_indexers(indices, {'x': x})
data = Dataset({'x': ('x', [1, 2, 3])})
test_indexer = lambda x: utils.remap_label_indexers(data, {'x': x})
self.assertEqual({'x': 0}, test_indexer(1))
self.assertEqual({'x': 0}, test_indexer(np.int32(1)))
self.assertEqual({'x': 0}, test_indexer(Variable([], 1)))
Expand Down
16 changes: 9 additions & 7 deletions test/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_properties(self):
v = Variable(['time'], data, {'foo': 'bar'})
self.assertEqual(v.dimensions, ('time',))
self.assertArrayEqual(v.values, data)
self.assertTrue(pd.Index(data).equals(v.index))
self.assertTrue(pd.Index(data).equals(v.as_index))
self.assertEqual(v.dtype, float)
self.assertEqual(v.shape, (10,))
self.assertEqual(v.size, 10)
Expand Down Expand Up @@ -253,18 +253,20 @@ def test_items(self):
# test iteration
for n, item in enumerate(v):
self.assertVariableEqual(Variable(['y'], data[n]), item)
with self.assertRaisesRegexp(TypeError, 'iteration over a 0-d'):
iter(Variable([], 0))
# test setting
v.values[:] = 0
self.assertTrue(np.all(v.values == 0))

def test_indexed_by(self):
def test_indexed(self):
v = Variable(['time', 'x'], self.d)
self.assertVariableEqual(v.indexed_by(time=slice(None)), v)
self.assertVariableEqual(v.indexed_by(time=0), v[0])
self.assertVariableEqual(v.indexed_by(time=slice(0, 3)), v[:3])
self.assertVariableEqual(v.indexed_by(x=0), v[:, 0])
self.assertVariableEqual(v.indexed(time=slice(None)), v)
self.assertVariableEqual(v.indexed(time=0), v[0])
self.assertVariableEqual(v.indexed(time=slice(0, 3)), v[:3])
self.assertVariableEqual(v.indexed(x=0), v[:, 0])
with self.assertRaisesRegexp(ValueError, 'do not exist'):
v.indexed_by(not_a_dim=0)
v.indexed(not_a_dim=0)

def test_transpose(self):
v = Variable(['time', 'x'], self.d)
Expand Down
2 changes: 1 addition & 1 deletion xray/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .variable import Variable
from .variable import Variable, CoordVariable
from .dataset import Dataset, open_dataset
from .data_array import DataArray, align
from .utils import class_alias as _class_alias
Expand Down
9 changes: 9 additions & 0 deletions xray/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,15 @@ def __array__(self, dtype=None):
def __repr__(self):
return array_repr(self)

def _iter(self):
for n in range(len(self)):
yield self[n]

def __iter__(self):
if self.ndim == 0:
raise TypeError('iteration over a 0-d array')
return self._iter()

@property
def T(self):
return self.transpose()
Expand Down
Loading

0 comments on commit 5e997e9

Please sign in to comment.