Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add ability to append to a model #674

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
167 changes: 167 additions & 0 deletions arch/tests/univariate/test_append.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import datetime as dt

Check notice

Code scanning / CodeQL

Unused import

Import of 'dt' is not used.
from functools import partial

Check notice

Code scanning / CodeQL

Unused import

Import of 'partial' is not used.
from itertools import product

import numpy as np
from numpy.random import RandomState
from numpy.testing import assert_allclose
import pandas as pd
import pytest

from arch.data import sp500
from arch.univariate import (
APARCH,
ARX,
EGARCH,
FIGARCH,
GARCH,
HARCH,
HARX,
LS,
ConstantMean,
ConstantVariance,
EWMAVariance,
MIDASHyperbolic,
RiskMetrics2006,
ZeroMean,
arch_model,
)

SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna()
N = SP500.shape[0]
SP500_initial = SP500.iloc[: N // 2]
SP500_append = SP500.iloc[N // 2 :]


class HARXWrapper(HARX):
def __init__(self, y, x=None, volatility=None):
super().__init__(y, lags=[1, 5], x=x, volatility=volatility)


class ARXWrapper(ARX):
def __init__(self, y, x=None, volatility=None):
super().__init__(y, lags=2, x=x, volatility=volatility)


MEAN_MODELS = [
HARXWrapper,
ARXWrapper,
ConstantMean,
ZeroMean,
]

VOLATILITIES = [
ConstantVariance(),
GARCH(),
FIGARCH(),
EWMAVariance(lam=0.94),
MIDASHyperbolic(),
HARCH(lags=[1, 5, 22]),
RiskMetrics2006(),
APARCH(),
EGARCH(),
]

X_MEAN_MODELS = [HARXWrapper, ARXWrapper, LS]

MODEL_SPECS = list(product(MEAN_MODELS, VOLATILITIES))

IDS = [f"{mean.__name__}-{str(vol).split('(')[0]}" for mean, vol in MODEL_SPECS]


@pytest.fixture(params=MODEL_SPECS, ids=IDS)
def mean_volatility(request):
mean, vol = request.param
return mean, vol


def test_append():
mod = arch_model(SP500_initial)
mod.append(SP500_append)
res = mod.fit(disp="off")

direct = arch_model(SP500)
res_direct = direct.fit(disp="off")
assert_allclose(res.params, res_direct.params, rtol=1e-5)
assert_allclose(res.conditional_volatility, res_direct.conditional_volatility)
assert_allclose(res.resid, res_direct.resid)
assert_allclose(mod._backcast, direct._backcast)


def test_alt_means(mean_volatility):
mean, vol = mean_volatility
mod = mean(SP500_initial, volatility=vol)
mod.append(SP500_append)
res = mod.fit(disp="off")

direct = mean(SP500, volatility=vol)
res_direct = direct.fit(disp="off")
assert_allclose(res.conditional_volatility, res_direct.conditional_volatility)
assert_allclose(res.resid, res_direct.resid)
if mod._backcast is not None:
assert_allclose(mod._backcast, direct._backcast)
else:
assert direct._backcast is None


def test_append_scalar_no_reestiamtion(mean_volatility):
mean, vol = mean_volatility
mod = mean(np.asarray(SP500_initial), volatility=vol)
for val in np.asarray(SP500_append):
mod.append(val)


def test_append_scalar_bad_value():
mod = HARX(SP500_initial, lags=[1, 5], volatility=GARCH())
with pytest.raises(TypeError):
mod.append(SP500_append.iloc[0])


def test_append_type_mismatch(mean_volatility):
mean, vol = mean_volatility
mod = mean(SP500_initial, volatility=vol)
with pytest.raises(TypeError, match="Input data must be the same"):
mod.append(np.asarray(SP500_append))
with pytest.raises(TypeError, match="Input data must be the same"):
mod.append(SP500_append.tolist())

mod_arr = mean(np.asarray(SP500_initial), volatility=vol)
with pytest.raises(TypeError, match="Input data must be the same"):
mod_arr.append(SP500_append)
with pytest.raises(TypeError, match="Input data must be the same"):
mod_arr.append(SP500_append.tolist())

mod_list = mean(SP500_initial.tolist(), volatility=vol)
with pytest.raises(TypeError, match="Input data must be the same"):
mod_list.append(SP500_append)
with pytest.raises(TypeError, match="Input data must be the same"):
mod_list.append(np.asarray(SP500_append))


def test_append_x_type_mismatch():
pass


@pytest.mark.parametrize("mean", X_MEAN_MODELS)
def test_bad_append_model_with_exog(mean):
mod = mean(SP500_initial, volatility=GARCH())
x = pd.DataFrame(
np.random.randn(SP500_append.shape[0], 2),
columns=["a", "b"],
index=SP500_append.index,
)
with pytest.raises(ValueError, match=""):
mod.append(SP500_append, x=x)

x_initial = pd.DataFrame(
np.random.randn(SP500_initial.shape[0], 2),
columns=["a", "b"],
index=SP500_initial.index,
)
mod = mean(SP500_initial, x=x_initial, volatility=GARCH())
with pytest.raises(ValueError, match=""):
mod.append(SP500_append)


def test_bad_append_ls():
pass
24 changes: 23 additions & 1 deletion arch/univariate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
)
from arch.univariate.distribution import Distribution, Normal
from arch.univariate.volatility import ConstantVariance, VolatilityProcess
from arch.utility.array import ensure1d
from arch.utility.array import append_same_type, ensure1d
from arch.utility.exceptions import (
ConvergenceWarning,
DataScaleWarning,
Expand Down Expand Up @@ -230,6 +230,28 @@ def name(self) -> str:
"""The name of the model."""
return self._name

def append(self, y: ArrayLike) -> None:
"""
Append data to the model

Parameters
----------
y : ndarray or Series
Data to append

Returns
-------
ARCHModel
Model with data appended
"""
_y = ensure1d(y, "y", series=True)
self._y_original = append_same_type(self._y_original, y)
self._y_series = pd.concat([self._y_series, _y])
self._y = np.concatenate([self._y, np.asarray(_y)])

self._fit_indices: [0, int(self._y.shape[0])]
self._fit_y = self._y

def constraints(self) -> tuple[Float64Array, Float64Array]:
"""
Construct linear constraint arrays for use in non-linear optimization
Expand Down
21 changes: 21 additions & 0 deletions arch/univariate/mean.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
SkewStudent,
StudentsT,
)
from arch.utility.array import append_same_type

if TYPE_CHECKING:
# Fake path to satisfy mypy
Expand Down Expand Up @@ -269,6 +270,7 @@ def __init__(
distribution=distribution,
rescale=rescale,
)
self._x_original = x
self._x = x
self._x_names: list[str] = []
self._x_index: None | NDArray | pd.Index = None
Expand Down Expand Up @@ -307,6 +309,25 @@ def __init__(

self._init_model()

def append(self, y: ArrayLike, x: ArrayLike2D | None = None) -> None:
super().append(y)
if x is not None:
if self._x is None:
raise ValueError("x was not provided in the original model")
_x = np.atleast_2d(np.asarray(x))
if _x.ndim != 2:
raise ValueError("x must be 2-d")
elif _x.shape[1] != self._x.shape[1]:
raise ValueError(
"x must have the same number of columns as the original x"
)
self._x_original = append_same_type(self._x_original, x)
self._x = np.asarray(self._x_original)
if self._x.shape[0] != self._y.shape[0]:
raise ValueError("x must have the same number of observations as y")

self._init_model()

def _scale_changed(self):
"""
Called when the scale has changed. This allows the model
Expand Down
40 changes: 39 additions & 1 deletion arch/utility/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,17 @@
from typing import Any, Literal, overload

import numpy as np
from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, Timestamp, to_datetime
import pandas as pd

Check notice

Code scanning / CodeQL

Unused import

Import of 'pd' is not used.

Check notice

Code scanning / CodeQL

Module is imported with 'import' and 'import from'

Module 'pandas' is imported with both 'import' and 'import from'.
from pandas import (
DataFrame,
DatetimeIndex,
Index,
NaT,
Series,
Timestamp,
concat,
to_datetime,
)

from arch.typing import AnyPandas, ArrayLike, DateLike, NDArray

Expand Down Expand Up @@ -310,3 +320,31 @@ def find_index(s: AnyPandas, index: int | DateLike) -> int:
if loc.size == 0:
raise ValueError("index not found")
return int(loc)


def append_same_type(original, new):
append_ok = isinstance(original, (list, np.ndarray)) and isinstance(
new, (float, np.floating)
)
append_ok = append_ok or isinstance(new, type(original))
if not append_ok:
raise TypeError(
"Input data must be the same type as the original data, unless the "
"original was an NDArray or a list, in which case the input data can "
f"be a scalar float. Got {type(new)}, expected {type(original)}."
)
if isinstance(original, (Series, DataFrame)):
extended = concat([original, new], axis=0)
elif isinstance(original, np.ndarray):
extended = np.concatenate([original, np.atleast_1d(new)])
elif isinstance(original, list):
if isinstance(new, list):
extended = original + new
else:
extended = original + [new]
else:
raise TypeError(
"Input data must be a pandas Series, DataFrame, numpy ndarray, or "
f"list. Got {type(original)}."
)
return extended