From b7728c188cdbd1afa89798ea94069effc7fdc146 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 20 Jun 2012 12:56:18 -0400 Subject: [PATCH] TST: resample test coverage etc. #1245 --- pandas/tests/test_series.py | 4 ++ pandas/tseries/period.py | 79 +++++++++------------------ pandas/tseries/resample.py | 44 ++++----------- pandas/tseries/tests/test_period.py | 51 ++++++++++++++++- pandas/tseries/tests/test_resample.py | 28 +++++++++- pandas/tseries/tools.py | 3 +- test.sh | 2 +- 7 files changed, 122 insertions(+), 89 deletions(-) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 32ce0d4c44649..930881038cb1b 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2707,6 +2707,10 @@ def test_asfreq(self): monthly_ts = daily_ts.asfreq(datetools.bmonthEnd) self.assert_(np.array_equal(monthly_ts, ts)) + result = ts[:0].asfreq('M') + self.assert_(len(result) == 0) + self.assert_(result is not ts) + def test_interpolate(self): ts = Series(np.arange(len(self.ts), dtype=float), self.ts.index) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 1d2c7ea18c589..0c526031cd479 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -21,44 +21,20 @@ # Period logic -def _period_field_accessor(name, alias=None): - if alias is None: - alias = name +def _period_field_accessor(name, alias): def f(self): base, mult = _gfc(self.freq) return plib.get_period_field(alias, self.ordinal, base) f.__name__ = name return property(f) -def _field_accessor(name, alias=None): - if alias is None: - alias = name +def _field_accessor(name, alias): def f(self): base, mult = _gfc(self.freq) return plib.get_period_field_arr(alias, self.values, base) f.__name__ = name return property(f) -def to_period(arg, freq=None): - """ Attempts to convert arg to timestamp """ - if arg is None: - return arg - - if type(arg) == float: - raise TypeError("Cannot convert a float to period") - - return Period(arg, freq=freq) - -def _to_quarterly(year, month, freq='Q-DEC'): - fmonth = _freq_mod._month_numbers[_freq_mod._get_rule_month(freq)] + 1 - print fmonth - mdiff = (month - fmonth) % 12 - if month >= fmonth: - mdiff += 12 - - ordin = 1 + (year - 1) * 4 + (mdiff - 1) / 3 - return Period(ordin, freq=freq) - class Period(object): __slots__ = ['freq', 'ordinal'] @@ -106,9 +82,6 @@ def __init__(self, value=None, freq=None, ordinal=None, if freq is None: raise ValueError("If value is None, freq cannot be None") - if year is None: - raise ValueError("If value is None, year cannot be None") - self.ordinal = _ordinal_from_fields(year, month, quarter, day, hour, minute, second, freq) @@ -172,13 +145,16 @@ def __sub__(self, other): else: # pragma: no cover raise TypeError(other) - def asfreq(self, freq=None, how='E'): + def asfreq(self, freq, how='E'): """ + Convert Period to desired frequency, either at the start or end of the + interval Parameters ---------- - freq : - how : + freq : string + how : {'E', 'S', 'end', 'start'}, default 'end' + Start or end of the timespan Returns ------- @@ -220,7 +196,6 @@ def to_timestamp(self, freq=None, how='S'): ------- Timestamp """ - # how = _validate_end_alias(how) if freq is None: base, mult = _gfc(self.freq) new_val = self @@ -228,9 +203,6 @@ def to_timestamp(self, freq=None, how='S'): base, mult = _gfc(freq) new_val = self.asfreq(freq, how) - if mult != 1: - raise ValueError('Only mult == 1 supported') - dt64 = plib.period_ordinal_to_dt64(new_val.ordinal, base) ts_freq = _period_rule_to_timestamp_rule(new_val.freq, how=how) return Timestamp(dt64, offset=to_offset(ts_freq)) @@ -424,7 +396,7 @@ def _get_date_and_freq(value, freq): elif reso == 'second': freq = 'S' else: - raise ValueError("Could not infer frequency for period") + raise ValueError("Invalid frequency or could not infer: %s" % reso) return dt, freq @@ -444,11 +416,6 @@ def _period_unbox_array(arr, check=None): unboxer = np.frompyfunc(lambda x: _period_unbox(x, check=check), 1, 1) return unboxer(arr) -def _period_box_array(arr, freq): - boxfunc = lambda x: Period(ordinal=x, freq=freq) - boxer = np.frompyfunc(boxfunc, 1, 1) - return boxer(arr) - def dt64arr_to_periodarr(data, freq): if data.dtype != np.dtype('M8[ns]'): raise ValueError('Wrong dtype: %s' % data.dtype) @@ -479,7 +446,10 @@ def wrapper(self, other): return result return wrapper + _INT64_DTYPE = np.dtype(np.int64) +_NS_DTYPE = np.dtype('M8[ns]') + class PeriodIndex(Int64Index): """ @@ -730,12 +700,18 @@ def map(self, f): try: return f(self) except: - values = np.asarray(list(self), dtype=object) + values = self._get_object_array() return _algos.arrmap_object(values, f) + def _get_object_array(self): + freq = self.freq + boxfunc = lambda x: Period(ordinal=x, freq=freq) + boxer = np.frompyfunc(boxfunc, 1, 1) + return boxer(self.values) + def _mpl_repr(self): # how to represent ourselves to matplotlib - return _period_box_array(self, self.freq) + return self._get_object_array() def to_timestamp(self, freq=None, how='start'): """ @@ -758,9 +734,6 @@ def to_timestamp(self, freq=None, how='start'): base, mult = _gfc(freq) new_data = self.asfreq(freq, how) - if mult != 1: - raise ValueError('Only mult == 1 supported') - new_data = plib.periodarr_to_dt64arr(new_data.values, base) return DatetimeIndex(new_data, freq='infer', name=self.name) @@ -823,14 +796,14 @@ def get_value(self, series, key): key = slice(pos[0], pos[1]+1) return series[key] else: - key = to_period(asdt, freq=self.freq) + key = Period(asdt, freq=self.freq) return self._engine.get_value(series, key.ordinal) except TypeError: pass except KeyError: pass - key = to_period(key, self.freq) + key = Period(key, self.freq) return self._engine.get_value(series, key.ordinal) def get_loc(self, key): @@ -850,7 +823,7 @@ def get_loc(self, key): except TypeError: pass - key = to_period(key, self.freq).ordinal + key = Period(key, self.freq).ordinal return self._engine.get_loc(key) def join(self, other, how='left', level=None, return_indexers=False): @@ -946,8 +919,10 @@ def _get_ordinal_range(start, end, periods, freq): if com._count_not_none(start, end, periods) < 2: raise ValueError('Must specify 2 of start, end, periods') - start = to_period(start, freq) - end = to_period(end, freq) + if start is not None: + start = Period(start, freq) + if end is not None: + end = Period(end, freq) is_start_per = isinstance(start, Period) is_end_per = isinstance(end, Period) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 41fac33d9b6ad..a2b1c2033e896 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -22,8 +22,6 @@ class TimeGrouper(CustomGrouper): rule : pandas offset string or object for identifying bin edges closed : closed end of interval; left (default) or right label : interval boundary to use for labeling; left (default) or right - begin : optional, timestamp-like - end : optional, timestamp-like nperiods : optional, integer convention : {'start', 'end', 'e', 's'} If axis is PeriodIndex @@ -34,14 +32,12 @@ class TimeGrouper(CustomGrouper): directly from the associated object """ def __init__(self, freq='Min', closed='right', label='right', how='mean', - begin=None, end=None, nperiods=None, axis=0, + nperiods=None, axis=0, fill_method=None, limit=None, loffset=None, kind=None, convention=None, base=0): self.freq = freq self.closed = closed self.label = label - self.begin = begin - self.end = end self.nperiods = nperiods self.kind = kind self.convention = convention or 'E' @@ -94,8 +90,8 @@ def _get_time_bins(self, axis): binner = labels = DatetimeIndex(data=[], freq=self.freq) return binner, [], labels - first, last = _get_range_edges(axis, self.begin, self.end, self.freq, - closed=self.closed, base=self.base) + first, last = _get_range_edges(axis, self.freq, closed=self.closed, + base=self.base) binner = labels = DatetimeIndex(freq=self.freq, start=first, end=last) # a little hack @@ -156,9 +152,6 @@ def _get_time_period_bins(self, axis): end_stamps = (labels + 1).asfreq('D', 's').to_timestamp() bins = axis.searchsorted(end_stamps, side='left') - if bins[-1] < len(axis): - bins = np.concatenate([bins, [len(axis)]]) - return binner, bins, labels def _resample_timestamps(self, obj): @@ -212,12 +205,8 @@ def _resample_periods(self, obj): if is_subperiod(axlabels.freq, self.freq): # Downsampling - if len(memb) > 1: - rng = np.arange(memb.values[0], memb.values[-1]) - bins = memb.searchsorted(rng, side='right') - else: - bins = np.array([], dtype=np.int32) - + rng = np.arange(memb.values[0], memb.values[-1]) + bins = memb.searchsorted(rng, side='right') grouper = BinGrouper(bins, new_index) grouped = obj.groupby(grouper, axis=self.axis) @@ -255,34 +244,23 @@ def _take_new_index(obj, indexer, new_index, axis=0): -def _get_range_edges(axis, begin, end, offset, closed='left', - base=0): +def _get_range_edges(axis, offset, closed='left', base=0): if isinstance(offset, basestring): offset = to_offset(offset) - if not isinstance(offset, DateOffset): - raise ValueError("Rule not a recognized offset") - if isinstance(offset, Tick): day_nanos = _delta_to_nanoseconds(timedelta(1)) # #1165 - if ((day_nanos % offset.nanos) == 0 and begin is None - and end is None): + if (day_nanos % offset.nanos) == 0: return _adjust_dates_anchored(axis[0], axis[-1], offset, closed=closed, base=base) - if begin is None: - if closed == 'left': - first = Timestamp(offset.rollback(axis[0])) - else: - first = Timestamp(axis[0] - offset) + if closed == 'left': + first = Timestamp(offset.rollback(axis[0])) else: - first = Timestamp(offset.rollback(begin)) + first = Timestamp(axis[0] - offset) - if end is None: - last = Timestamp(axis[-1] + offset) - else: - last = Timestamp(offset.rollforward(end)) + last = Timestamp(axis[-1] + offset) return first, last diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index f94eafef7486b..57076e7348cff 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -184,9 +184,13 @@ def test_strftime(self): '2000-01-01 12:34:12') def test_sub_delta(self): - result = Period('2011', freq='A') - Period('2007', freq='A') + left, right = Period('2011', freq='A'), Period('2007', freq='A') + result = left - right self.assertEqual(result, 4) + self.assertRaises(ValueError, left.__sub__, + Period('2007-01', freq='M')) + def test_to_timestamp(self): p = Period('1982', freq='A') start_ts = p.to_timestamp(how='S') @@ -234,6 +238,8 @@ def test_to_timestamp(self): result = p.to_timestamp('S', how='start') self.assertEquals(result, expected) + self.assertRaises(ValueError, p.to_timestamp, '5t') + def test_properties_annually(self): # Test properties on Periods with annually frequency. a_date = Period(freq='A', year=2007) @@ -354,6 +360,39 @@ def test_constructor_corner(self): self.assertRaises(ValueError, Period, year=2007, month=1, freq='2M') + self.assertRaises(ValueError, Period, datetime.now()) + self.assertRaises(ValueError, Period, 1.6, freq='D') + self.assertRaises(ValueError, Period, ordinal=1.6, freq='D') + self.assertRaises(ValueError, Period, ordinal=2, value=1, freq='D') + self.assertRaises(ValueError, Period) + self.assertRaises(ValueError, Period, month=1) + + p = Period('2007-01-01', freq='D') + + result = Period(p, freq='A') + exp = Period('2007', freq='A') + self.assertEquals(result, exp) + + def test_constructor_infer_freq(self): + p = Period('2007-01-01') + self.assert_(p.freq == 'D') + + p = Period('2007-01-01 07') + self.assert_(p.freq == 'H') + + p = Period('2007-01-01 07:10') + self.assert_(p.freq == 'T') + + p = Period('2007-01-01 07:10:15') + self.assert_(p.freq == 'S') + + self.assertRaises(ValueError, Period, '2007-01-01 07:10:15.123456') + + def test_comparisons(self): + p = Period('2007-01-01') + self.assertEquals(p, p) + self.assert_(not p == 1) + def noWrap(item): return item @@ -363,6 +402,10 @@ class TestFreqConversion(TestCase): def __init__(self, *args, **kwds): TestCase.__init__(self, *args, **kwds) + def test_asfreq_corner(self): + val = Period(freq='A', year=2007) + self.assertRaises(ValueError, val.asfreq, '5t') + def test_conv_annual(self): # frequency conversion tests: from Annual Frequency @@ -1039,6 +1082,12 @@ def test_constructor_fromarraylike(self): exp = idx.asfreq('D', 'e') self.assert_(result.equals(exp)) + def test_constructor_datetime64arr(self): + vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) + vals = vals.view(np.dtype('M8[us]')) + + self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + def test_comp_period(self): idx = period_range('2007-01', periods=20, freq='M') diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index a7ccee941582a..15c82b8c96e63 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -6,7 +6,7 @@ from pandas.tseries.index import date_range from pandas.tseries.offsets import Minute, BDay -from pandas.tseries.period import period_range, PeriodIndex +from pandas.tseries.period import period_range, PeriodIndex, Period from pandas.tseries.resample import DatetimeIndex, TimeGrouper import pandas.tseries.offsets as offsets import pandas as pd @@ -462,6 +462,10 @@ def test_resample_anchored_intraday(self): expected = df.resample('Q', kind='period', closed='left').to_timestamp() tm.assert_frame_equal(result, expected) + ts = _simple_ts('2012-04-29 23:00', '2012-04-30 5:00', freq='h') + resampled = ts.resample('M') + self.assert_(len(resampled) == 1) + def test_resample_anchored_monthstart(self): ts = _simple_ts('1/1/2000', '12/31/2002') @@ -470,6 +474,27 @@ def test_resample_anchored_monthstart(self): for freq in freqs: result = ts.resample(freq, how='mean') + def test_corner_cases(self): + # miscellaneous test coverage + + rng = date_range('1/1/2000', periods=12, freq='t') + ts = Series(np.random.randn(len(rng)), index=rng) + + result = ts.resample('5t', closed='right', label='left') + ex_index = date_range('1999-12-31 23:55', periods=4, freq='5t') + self.assert_(result.index.equals(ex_index)) + + len0pts = _simple_pts('2007-01', '2010-05', freq='M')[:0] + # it works + result = len0pts.resample('A-DEC') + self.assert_(len(result) == 0) + + # resample to periods + ts = _simple_ts('2000-04-28', '2000-04-30 11:00', freq='h') + result = ts.resample('M', kind='period') + self.assert_(len(result) == 1) + self.assert_(result.index[0] == Period('2000-04', freq='M')) + def _simple_ts(start, end, freq='D'): rng = date_range(start, end, freq=freq) @@ -720,6 +745,7 @@ def test_closed_left_corner(self): self.assert_(result.index.equals(ex_index)) assert_series_equal(result, exp) + class TestTimeGrouper(unittest.TestCase): def setUp(self): diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index c12ba901da8a6..ed430f8bd9134 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -210,7 +210,7 @@ def parse_time_string(arg, freq=None): "minute", "second", "microsecond"]: can_be_zero = ['hour', 'minute', 'second', 'microsecond'] value = getattr(parsed, attr) - if value is not None and (value != 0 or attr in can_be_zero): + if value is not None and value != 0: # or attr in can_be_zero): repl[attr] = value if not stopped: reso = attr @@ -218,6 +218,7 @@ def parse_time_string(arg, freq=None): raise DateParseError("Missing attribute before %s" % attr) else: stopped = True + break ret = default.replace(**repl) return ret, parsed, reso # datetime, resolution except Exception, e: diff --git a/test.sh b/test.sh index 4ecb27dad6f3b..6084b2d3b706d 100755 --- a/test.sh +++ b/test.sh @@ -3,7 +3,7 @@ coverage erase # nosetests pandas/tests/test_index.py --with-coverage --cover-package=pandas.core --pdb-failure --pdb #nosetests -w pandas --with-coverage --cover-package=pandas --pdb-failure --pdb #--cover-inclusive nosetests -A "not slow" -w pandas/tseries --with-coverage --cover-package=pandas.tseries $* #--cover-inclusive -# nosetests -w pandas --with-coverage --cover-package=pandas $* +#nosetests -w pandas --with-coverage --cover-package=pandas $* # nosetests -w pandas/io --with-coverage --cover-package=pandas.io --pdb-failure --pdb # nosetests -w pandas/core --with-coverage --cover-package=pandas.core --pdb-failure --pdb # nosetests -w pandas/stats --with-coverage --cover-package=pandas.stats