Skip to content

Commit

Permalink
ENH: Add DataSet.get_column.
Browse files Browse the repository at this point in the history
Adds a new `get_column` method on `zipline.pipeline.data.DataSet` for looking
up pipeline columns by name.
  • Loading branch information
Scott Sanderson committed Apr 8, 2019
1 parent 2af3bc0 commit 9befcec
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 0 deletions.
75 changes: 75 additions & 0 deletions tests/pipeline/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""Tests for the zipline.pipeline.data.DataSet and related functionality.
"""
from textwrap import dedent

from zipline.pipeline.data.dataset import Column, DataSet
from zipline.testing import chrange, ZiplineTestCase
from zipline.testing.predicates import assert_messages_equal


class SomeDataSet(DataSet):
a = Column(dtype=float)
b = Column(dtype=object)
c = Column(dtype=int, missing_value=-1)


# A DataSet with lots of columns.
class LargeDataSet(DataSet):
locals().update({
name: Column(dtype=float)
for name in chrange('a', 'z')
})


class GetColumnTestCase(ZiplineTestCase):

def test_get_column_success(self):
a = SomeDataSet.a
b = SomeDataSet.b
c = SomeDataSet.c

# Run multiple times to validate caching of descriptor return values.
for _ in range(3):
self.assertIs(SomeDataSet.get_column('a'), a)
self.assertIs(SomeDataSet.get_column('b'), b)
self.assertIs(SomeDataSet.get_column('c'), c)

def test_get_column_failure(self):
with self.assertRaises(AttributeError) as e:
SomeDataSet.get_column('arglebargle')

result = str(e.exception)
expected = dedent(
"""\
SomeDataSet has no column 'arglebargle':
Possible choices are:
- a
- b
- c"""
)
assert_messages_equal(result, expected)

def test_get_column_failure_truncate_error_message(self):
with self.assertRaises(AttributeError) as e:
LargeDataSet.get_column('arglebargle')

result = str(e.exception)
expected = dedent(
"""\
LargeDataSet has no column 'arglebargle':
Possible choices are:
- a
- b
- c
- d
- e
- f
- g
- h
- i
- ...
- z"""
)
assert_messages_equal(result, expected)
42 changes: 42 additions & 0 deletions zipline/pipeline/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from zipline.utils.input_validation import ensure_dtype, expect_types
from zipline.utils.numpy_utils import NoDefaultMissingValue
from zipline.utils.preprocess import preprocess
from zipline.utils.string_formatting import bulleted_list


IsSpecialization = sentinel('IsSpecialization')
Expand Down Expand Up @@ -497,6 +498,47 @@ class CompanyMetadata(DataSet):
domain = GENERIC
ndim = 2

@classmethod
def get_column(cls, name):
"""Look up a column by name.
Parameters
----------
name : str
Name of the column to look up.
Returns
-------
column : zipline.pipeline.data.BoundColumn
Column with the given name.
Raises
------
AttributeError
If no column with the given name exists.
"""
clsdict = vars(cls)
try:
maybe_column = clsdict[name]
if not isinstance(maybe_column, _BoundColumnDescr):
raise KeyError(name)
except KeyError:
raise AttributeError(
"{dset} has no column {colname!r}:\n\n"
"Possible choices are:\n"
"{choices}".format(
dset=cls.qualname,
colname=name,
choices=bulleted_list(
sorted(cls._column_names),
max_count=10,
),
)
)

# Resolve column descriptor into a BoundColumn.
return maybe_column.__get__(None, cls)


# This attribute is set by DataSetMeta to mark that a class is the root of a
# family of datasets with diffent domains. We don't want that behavior for the
Expand Down
11 changes: 11 additions & 0 deletions zipline/utils/string_formatting.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def bulleted_list(items, max_count=None, indent=2):
"""Format a bulleted list of values.
"""
if max_count is not None and len(items) > max_count:
item_list = list(items)
items = item_list[:max_count - 1]
items.append('...')
items.append(item_list[-1])

line_template = (" " * indent) + "- {}"
return "\n".join(map(line_template.format, items))

0 comments on commit 9befcec

Please sign in to comment.