From 478d0f65a67f2774890bd5ab73a3eecdbcc96700 Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Wed, 12 Jul 2023 19:04:05 +0100 Subject: [PATCH 1/2] ENH: Add ability to append to a model --- arch/univariate/base.py | 24 +++++++++++++++++++++++- arch/univariate/mean.py | 21 +++++++++++++++++++++ arch/utility/array.py | 31 ++++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/arch/univariate/base.py b/arch/univariate/base.py index c2001fde74..f5fa739feb 100644 --- a/arch/univariate/base.py +++ b/arch/univariate/base.py @@ -35,7 +35,7 @@ ) from arch.univariate.distribution import Distribution, Normal from arch.univariate.volatility import ConstantVariance, VolatilityProcess -from arch.utility.array import ensure1d +from arch.utility.array import append_same_type, ensure1d from arch.utility.exceptions import ( ConvergenceWarning, DataScaleWarning, @@ -230,6 +230,28 @@ def name(self) -> str: """The name of the model.""" return self._name + def append(self, y: ArrayLike) -> None: + """ + Append data to the model + + Parameters + ---------- + y : ndarray or Series + Data to append + + Returns + ------- + ARCHModel + Model with data appended + """ + _y = ensure1d(y, "y", series=True) + self._y_original = append_same_type(self._y_original, y) + self._y_series = pd.concat([self._y_series, _y]) + self._y = np.concatenate([self._y, np.asarray(_y)]) + + self._fit_indices: [0, int(self._y.shape[0])] + self._fit_y = self._y + def constraints(self) -> tuple[Float64Array, Float64Array]: """ Construct linear constraint arrays for use in non-linear optimization diff --git a/arch/univariate/mean.py b/arch/univariate/mean.py index 368ac97202..37ae703341 100644 --- a/arch/univariate/mean.py +++ b/arch/univariate/mean.py @@ -39,6 +39,7 @@ SkewStudent, StudentsT, ) +from arch.utility.array import append_same_type if TYPE_CHECKING: # Fake path to satisfy mypy @@ -269,6 +270,7 @@ def __init__( distribution=distribution, rescale=rescale, ) + self._x_original = x self._x = x self._x_names: list[str] = [] self._x_index: None | NDArray | pd.Index = None @@ -307,6 +309,25 @@ def __init__( self._init_model() + def append(self, y: ArrayLike, x: ArrayLike2D | None = None) -> None: + super().append(y) + if x is not None: + if self._x is None: + raise ValueError("x was not provided in the original model") + _x = np.atleast_2d(np.asarray(x)) + if _x.ndim != 2: + raise ValueError("x must be 2-d") + elif _x.shape[1] != self._x.shape[1]: + raise ValueError( + "x must have the same number of columns as the original x" + ) + self._x_original = append_same_type(self._x_original, x) + self._x = np.asarray(self._x_original) + if self._x.shape[0] != self._y.shape[0]: + raise ValueError("x must have the same number of observations as y") + + self._init_model() + def _scale_changed(self): """ Called when the scale has changed. This allows the model diff --git a/arch/utility/array.py b/arch/utility/array.py index cdec75c5e3..437cfc821b 100644 --- a/arch/utility/array.py +++ b/arch/utility/array.py @@ -12,7 +12,16 @@ from typing import Any, Literal, overload import numpy as np -from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, Timestamp, to_datetime +from pandas import ( + DataFrame, + DatetimeIndex, + Index, + NaT, + Series, + Timestamp, + concat, + to_datetime, +) from arch.typing import AnyPandas, ArrayLike, DateLike, NDArray @@ -310,3 +319,23 @@ def find_index(s: AnyPandas, index: int | DateLike) -> int: if loc.size == 0: raise ValueError("index not found") return int(loc) + + +def append_same_type(original, new): + if not isinstance(new, type(original)): + raise TypeError( + "Input data must be the same type as the original data. " + f"Got {type(new)}, expected {type(original)}." + ) + if isinstance(original, (Series, DataFrame)): + extended = concat([original, new], axis=0) + elif isinstance(original, np.ndarray): + extended = np.concatenate([original, new]) + elif isinstance(original, list): + extended = original + new + else: + raise TypeError( + "Input data must be a pandas Series, DataFrame, numpy ndarray, or " + f"list. Got {type(original)}." + ) + return extended From 4cd013d6d6fa0cd14981e69734c3bee515a6b8ef Mon Sep 17 00:00:00 2001 From: Kevin Sheppard Date: Sat, 15 Jul 2023 11:02:41 +0100 Subject: [PATCH 2/2] TST: Add initial tests for append --- arch/tests/univariate/test_append.py | 167 +++++++++++++++++++++++++++ arch/univariate/mean.py | 2 +- arch/utility/array.py | 19 ++- 3 files changed, 182 insertions(+), 6 deletions(-) create mode 100644 arch/tests/univariate/test_append.py diff --git a/arch/tests/univariate/test_append.py b/arch/tests/univariate/test_append.py new file mode 100644 index 0000000000..1ebf5826e3 --- /dev/null +++ b/arch/tests/univariate/test_append.py @@ -0,0 +1,167 @@ +import datetime as dt +from functools import partial +from itertools import product + +import numpy as np +from numpy.random import RandomState +from numpy.testing import assert_allclose +import pandas as pd +import pytest + +from arch.data import sp500 +from arch.univariate import ( + APARCH, + ARX, + EGARCH, + FIGARCH, + GARCH, + HARCH, + HARX, + LS, + ConstantMean, + ConstantVariance, + EWMAVariance, + MIDASHyperbolic, + RiskMetrics2006, + ZeroMean, + arch_model, +) + +SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna() +N = SP500.shape[0] +SP500_initial = SP500.iloc[: N // 2] +SP500_append = SP500.iloc[N // 2 :] + + +class HARXWrapper(HARX): + def __init__(self, y, x=None, volatility=None): + super().__init__(y, lags=[1, 5], x=x, volatility=volatility) + + +class ARXWrapper(ARX): + def __init__(self, y, x=None, volatility=None): + super().__init__(y, lags=2, x=x, volatility=volatility) + + +MEAN_MODELS = [ + HARXWrapper, + ARXWrapper, + ConstantMean, + ZeroMean, +] + +VOLATILITIES = [ + ConstantVariance(), + GARCH(), + FIGARCH(), + EWMAVariance(lam=0.94), + MIDASHyperbolic(), + HARCH(lags=[1, 5, 22]), + RiskMetrics2006(), + APARCH(), + EGARCH(), +] + +X_MEAN_MODELS = [HARXWrapper, ARXWrapper, LS] + +MODEL_SPECS = list(product(MEAN_MODELS, VOLATILITIES)) + +IDS = [f"{mean.__name__}-{str(vol).split('(')[0]}" for mean, vol in MODEL_SPECS] + + +@pytest.fixture(params=MODEL_SPECS, ids=IDS) +def mean_volatility(request): + mean, vol = request.param + return mean, vol + + +def test_append(): + mod = arch_model(SP500_initial) + mod.append(SP500_append) + res = mod.fit(disp="off") + + direct = arch_model(SP500) + res_direct = direct.fit(disp="off") + assert_allclose(res.params, res_direct.params, rtol=1e-5) + assert_allclose(res.conditional_volatility, res_direct.conditional_volatility) + assert_allclose(res.resid, res_direct.resid) + assert_allclose(mod._backcast, direct._backcast) + + +def test_alt_means(mean_volatility): + mean, vol = mean_volatility + mod = mean(SP500_initial, volatility=vol) + mod.append(SP500_append) + res = mod.fit(disp="off") + + direct = mean(SP500, volatility=vol) + res_direct = direct.fit(disp="off") + assert_allclose(res.conditional_volatility, res_direct.conditional_volatility) + assert_allclose(res.resid, res_direct.resid) + if mod._backcast is not None: + assert_allclose(mod._backcast, direct._backcast) + else: + assert direct._backcast is None + + +def test_append_scalar_no_reestiamtion(mean_volatility): + mean, vol = mean_volatility + mod = mean(np.asarray(SP500_initial), volatility=vol) + for val in np.asarray(SP500_append): + mod.append(val) + + +def test_append_scalar_bad_value(): + mod = HARX(SP500_initial, lags=[1, 5], volatility=GARCH()) + with pytest.raises(TypeError): + mod.append(SP500_append.iloc[0]) + + +def test_append_type_mismatch(mean_volatility): + mean, vol = mean_volatility + mod = mean(SP500_initial, volatility=vol) + with pytest.raises(TypeError, match="Input data must be the same"): + mod.append(np.asarray(SP500_append)) + with pytest.raises(TypeError, match="Input data must be the same"): + mod.append(SP500_append.tolist()) + + mod_arr = mean(np.asarray(SP500_initial), volatility=vol) + with pytest.raises(TypeError, match="Input data must be the same"): + mod_arr.append(SP500_append) + with pytest.raises(TypeError, match="Input data must be the same"): + mod_arr.append(SP500_append.tolist()) + + mod_list = mean(SP500_initial.tolist(), volatility=vol) + with pytest.raises(TypeError, match="Input data must be the same"): + mod_list.append(SP500_append) + with pytest.raises(TypeError, match="Input data must be the same"): + mod_list.append(np.asarray(SP500_append)) + + +def test_append_x_type_mismatch(): + pass + + +@pytest.mark.parametrize("mean", X_MEAN_MODELS) +def test_bad_append_model_with_exog(mean): + mod = mean(SP500_initial, volatility=GARCH()) + x = pd.DataFrame( + np.random.randn(SP500_append.shape[0], 2), + columns=["a", "b"], + index=SP500_append.index, + ) + with pytest.raises(ValueError, match=""): + mod.append(SP500_append, x=x) + + x_initial = pd.DataFrame( + np.random.randn(SP500_initial.shape[0], 2), + columns=["a", "b"], + index=SP500_initial.index, + ) + mod = mean(SP500_initial, x=x_initial, volatility=GARCH()) + with pytest.raises(ValueError, match=""): + mod.append(SP500_append) + + +def test_bad_append_ls(): + pass diff --git a/arch/univariate/mean.py b/arch/univariate/mean.py index 37ae703341..2600a6a971 100644 --- a/arch/univariate/mean.py +++ b/arch/univariate/mean.py @@ -326,7 +326,7 @@ def append(self, y: ArrayLike, x: ArrayLike2D | None = None) -> None: if self._x.shape[0] != self._y.shape[0]: raise ValueError("x must have the same number of observations as y") - self._init_model() + self._init_model() def _scale_changed(self): """ diff --git a/arch/utility/array.py b/arch/utility/array.py index 437cfc821b..b85c473f79 100644 --- a/arch/utility/array.py +++ b/arch/utility/array.py @@ -12,6 +12,7 @@ from typing import Any, Literal, overload import numpy as np +import pandas as pd from pandas import ( DataFrame, DatetimeIndex, @@ -322,17 +323,25 @@ def find_index(s: AnyPandas, index: int | DateLike) -> int: def append_same_type(original, new): - if not isinstance(new, type(original)): + append_ok = isinstance(original, (list, np.ndarray)) and isinstance( + new, (float, np.floating) + ) + append_ok = append_ok or isinstance(new, type(original)) + if not append_ok: raise TypeError( - "Input data must be the same type as the original data. " - f"Got {type(new)}, expected {type(original)}." + "Input data must be the same type as the original data, unless the " + "original was an NDArray or a list, in which case the input data can " + f"be a scalar float. Got {type(new)}, expected {type(original)}." ) if isinstance(original, (Series, DataFrame)): extended = concat([original, new], axis=0) elif isinstance(original, np.ndarray): - extended = np.concatenate([original, new]) + extended = np.concatenate([original, np.atleast_1d(new)]) elif isinstance(original, list): - extended = original + new + if isinstance(new, list): + extended = original + new + else: + extended = original + [new] else: raise TypeError( "Input data must be a pandas Series, DataFrame, numpy ndarray, or "