Skip to content

Commit

Permalink
add fields selection examples
Browse files Browse the repository at this point in the history
  • Loading branch information
alimanfoo committed Nov 10, 2017
1 parent 3b0966a commit 92a0b5c
Show file tree
Hide file tree
Showing 2 changed files with 307 additions and 7 deletions.
302 changes: 301 additions & 1 deletion notebooks/advanced_indexing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
{
"data": {
"text/plain": [
"'2.1.5.dev113'"
"'2.1.5.dev118+dirty'"
]
},
"execution_count": 1,
Expand Down Expand Up @@ -967,6 +967,306 @@
"za[:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Selecting fields from arrays with a structured dtype\n",
"\n",
"All ``get/set_selection_...()`` methods support a ``fields`` argument which allows retrieving/replacing data for a specific field or fields. Also h5py-like API is supported where fields can be provided within ``__getitem__``, ``.oindex[]`` and ``.vindex[]``."
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 1, 4.2), (b'bbb', 2, 8.4), (b'ccc', 3, 12.6)],\n",
" dtype=[('foo', 'S3'), ('bar', '<i4'), ('baz', '<f8')])"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a = np.array([(b'aaa', 1, 4.2),\n",
" (b'bbb', 2, 8.4),\n",
" (b'ccc', 3, 12.6)], \n",
" dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')])\n",
"za = zarr.array(a, chunks=2)\n",
"za[:]"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([b'aaa', b'bbb', b'ccc'],\n",
" dtype='|S3')"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za['foo']"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 4.2), (b'bbb', 8.4), (b'ccc', 12.6)],\n",
" dtype=[('foo', 'S3'), ('baz', '<f8')])"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za['foo', 'baz']"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([b'aaa', b'bbb'],\n",
" dtype='|S3')"
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za[:2, 'foo']"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 4.2), (b'bbb', 8.4)],\n",
" dtype=[('foo', 'S3'), ('baz', '<f8')])"
]
},
"execution_count": 53,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za[:2, 'foo', 'baz']"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([b'aaa', b'ccc'],\n",
" dtype='|S3')"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za.oindex[[0, 2], 'foo']"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([b'aaa', b'ccc'],\n",
" dtype='|S3')"
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za.vindex[[0, 2], 'foo']"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 42, 4.2), (b'bbb', 42, 8.4), (b'ccc', 42, 12.6)],\n",
" dtype=[('foo', 'S3'), ('bar', '<i4'), ('baz', '<f8')])"
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za['bar'] = 42\n",
"za[:]"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 84, 4.2), (b'bbb', 84, 8.4), (b'ccc', 42, 12.6)],\n",
" dtype=[('foo', 'S3'), ('bar', '<i4'), ('baz', '<f8')])"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za[:2, 'bar'] = 84\n",
"za[:]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Note that this API differs from numpy when selecting multiple fields. E.g.:"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"ename": "IndexError",
"evalue": "only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-62-ad64b4a52de3>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0ma\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'foo'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'baz'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices"
]
}
],
"source": [
"a['foo', 'baz']"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 4.2), (b'bbb', 8.4), (b'ccc', 12.6)],\n",
" dtype=[('foo', 'S3'), ('baz', '<f8')])"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a[['foo', 'baz']]"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([(b'aaa', 4.2), (b'bbb', 8.4), (b'ccc', 12.6)],\n",
" dtype=[('foo', 'S3'), ('baz', '<f8')])"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"za['foo', 'baz']"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"ename": "IndexError",
"evalue": "unsupported selection type; expected integer or contiguous slice, got ['foo', 'baz']",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-61-2394dac3f711>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mza\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'foo'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'baz'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, selection)\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 471\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpop_fields\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 472\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 473\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 474\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_basic_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36mget_basic_selection\u001b[0;34m(self, selection, out, fields)\u001b[0m\n\u001b[1;32m 483\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_basic_selection_zd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 484\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 485\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_basic_selection_nd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 486\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 487\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_get_basic_selection_zd\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/core.py\u001b[0m in \u001b[0;36m_get_basic_selection_nd\u001b[0;34m(self, selection, out, fields)\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 526\u001b[0m \u001b[0;31m# setup indexer\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 527\u001b[0;31m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mBasicIndexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mselection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 528\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 529\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_selection\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mout\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfields\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mfields\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m~/src/github/alimanfoo/zarr/zarr/indexing.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, selection, array)\u001b[0m\n\u001b[1;32m 224\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 225\u001b[0m raise IndexError('unsupported selection type; expected integer or contiguous '\n\u001b[0;32m--> 226\u001b[0;31m 'slice, got {!r}'.format(dim_sel))\n\u001b[0m\u001b[1;32m 227\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 228\u001b[0m \u001b[0mdim_indexers\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdim_indexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mIndexError\u001b[0m: unsupported selection type; expected integer or contiguous slice, got ['foo', 'baz']"
]
}
],
"source": [
"za[['foo', 'baz']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
12 changes: 6 additions & 6 deletions zarr/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
import numpy as np


from zarr.util import is_total_slice, human_readable_size, normalize_resize_args, \
normalize_storage_path, normalize_shape, normalize_chunks, InfoReporter
from zarr.util import (is_total_slice, human_readable_size, normalize_resize_args,
normalize_storage_path, normalize_shape, normalize_chunks, InfoReporter)
from zarr.storage import array_meta_key, attrs_key, listdir, getsize
from zarr.meta import decode_array_metadata, encode_array_metadata
from zarr.attrs import Attributes
from zarr.errors import PermissionError, err_read_only, err_array_not_found
from zarr.compat import reduce
from zarr.codecs import AsType, get_codec
from zarr.indexing import OIndex, OrthogonalIndexer, BasicIndexer, VIndex, CoordinateIndexer, \
MaskIndexer, check_fields, pop_fields, ensure_tuple
from zarr.indexing import (OIndex, OrthogonalIndexer, BasicIndexer, VIndex, CoordinateIndexer,
MaskIndexer, check_fields, pop_fields, ensure_tuple)


def is_scalar(value, dtype):
Expand Down Expand Up @@ -1130,8 +1130,8 @@ def bytestr(n):
return items

def __getstate__(self):
return self._store, self._path, self._read_only, self._chunk_store, self._synchronizer, \
self._cache_metadata
return (self._store, self._path, self._read_only, self._chunk_store, self._synchronizer,
self._cache_metadata)

def __setstate__(self, state):
self.__init__(*state)
Expand Down

0 comments on commit 92a0b5c

Please sign in to comment.