From 478d0f65a67f2774890bd5ab73a3eecdbcc96700 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Wed, 12 Jul 2023 19:04:05 +0100
Subject: [PATCH 1/2] ENH: Add ability to append to a model

---
 arch/univariate/base.py | 24 +++++++++++++++++++++++-
 arch/univariate/mean.py | 21 +++++++++++++++++++++
 arch/utility/array.py   | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 74 insertions(+), 2 deletions(-)

diff --git a/arch/univariate/base.py b/arch/univariate/base.py
index c2001fde74..f5fa739feb 100644
--- a/arch/univariate/base.py
+++ b/arch/univariate/base.py
@@ -35,7 +35,7 @@
 )
 from arch.univariate.distribution import Distribution, Normal
 from arch.univariate.volatility import ConstantVariance, VolatilityProcess
-from arch.utility.array import ensure1d
+from arch.utility.array import append_same_type, ensure1d
 from arch.utility.exceptions import (
     ConvergenceWarning,
     DataScaleWarning,
@@ -230,6 +230,28 @@ def name(self) -> str:
         """The name of the model."""
         return self._name
 
+    def append(self, y: ArrayLike) -> None:
+        """
+        Append data to the model
+
+        Parameters
+        ----------
+        y : ndarray or Series
+            Data to append
+
+        Returns
+        -------
+        ARCHModel
+            Model with data appended
+        """
+        _y = ensure1d(y, "y", series=True)
+        self._y_original = append_same_type(self._y_original, y)
+        self._y_series = pd.concat([self._y_series, _y])
+        self._y = np.concatenate([self._y, np.asarray(_y)])
+
+        self._fit_indices: [0, int(self._y.shape[0])]
+        self._fit_y = self._y
+
     def constraints(self) -> tuple[Float64Array, Float64Array]:
         """
         Construct linear constraint arrays  for use in non-linear optimization
diff --git a/arch/univariate/mean.py b/arch/univariate/mean.py
index 368ac97202..37ae703341 100644
--- a/arch/univariate/mean.py
+++ b/arch/univariate/mean.py
@@ -39,6 +39,7 @@
     SkewStudent,
     StudentsT,
 )
+from arch.utility.array import append_same_type
 
 if TYPE_CHECKING:
     # Fake path to satisfy mypy
@@ -269,6 +270,7 @@ def __init__(
             distribution=distribution,
             rescale=rescale,
         )
+        self._x_original = x
         self._x = x
         self._x_names: list[str] = []
         self._x_index: None | NDArray | pd.Index = None
@@ -307,6 +309,25 @@ def __init__(
 
         self._init_model()
 
+    def append(self, y: ArrayLike, x: ArrayLike2D | None = None) -> None:
+        super().append(y)
+        if x is not None:
+            if self._x is None:
+                raise ValueError("x was not provided in the original model")
+            _x = np.atleast_2d(np.asarray(x))
+            if _x.ndim != 2:
+                raise ValueError("x must be 2-d")
+            elif _x.shape[1] != self._x.shape[1]:
+                raise ValueError(
+                    "x must have the same number of columns as the original x"
+                )
+            self._x_original = append_same_type(self._x_original, x)
+            self._x = np.asarray(self._x_original)
+            if self._x.shape[0] != self._y.shape[0]:
+                raise ValueError("x must have the same number of observations as y")
+
+            self._init_model()
+
     def _scale_changed(self):
         """
         Called when the scale has changed.  This allows the model
diff --git a/arch/utility/array.py b/arch/utility/array.py
index cdec75c5e3..437cfc821b 100644
--- a/arch/utility/array.py
+++ b/arch/utility/array.py
@@ -12,7 +12,16 @@
 from typing import Any, Literal, overload
 
 import numpy as np
-from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, Timestamp, to_datetime
+from pandas import (
+    DataFrame,
+    DatetimeIndex,
+    Index,
+    NaT,
+    Series,
+    Timestamp,
+    concat,
+    to_datetime,
+)
 
 from arch.typing import AnyPandas, ArrayLike, DateLike, NDArray
 
@@ -310,3 +319,23 @@ def find_index(s: AnyPandas, index: int | DateLike) -> int:
     if loc.size == 0:
         raise ValueError("index not found")
     return int(loc)
+
+
+def append_same_type(original, new):
+    if not isinstance(new, type(original)):
+        raise TypeError(
+            "Input data must be the same type as the original data. "
+            f"Got {type(new)}, expected {type(original)}."
+        )
+    if isinstance(original, (Series, DataFrame)):
+        extended = concat([original, new], axis=0)
+    elif isinstance(original, np.ndarray):
+        extended = np.concatenate([original, new])
+    elif isinstance(original, list):
+        extended = original + new
+    else:
+        raise TypeError(
+            "Input data must be a pandas Series, DataFrame, numpy ndarray, or "
+            f"list. Got {type(original)}."
+        )
+    return extended

From 4cd013d6d6fa0cd14981e69734c3bee515a6b8ef Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <kevin.k.sheppard@gmail.com>
Date: Sat, 15 Jul 2023 11:02:41 +0100
Subject: [PATCH 2/2] TST: Add initial tests for append

---
 arch/tests/univariate/test_append.py | 167 +++++++++++++++++++++++++++
 arch/univariate/mean.py              |   2 +-
 arch/utility/array.py                |  19 ++-
 3 files changed, 182 insertions(+), 6 deletions(-)
 create mode 100644 arch/tests/univariate/test_append.py

diff --git a/arch/tests/univariate/test_append.py b/arch/tests/univariate/test_append.py
new file mode 100644
index 0000000000..1ebf5826e3
--- /dev/null
+++ b/arch/tests/univariate/test_append.py
@@ -0,0 +1,167 @@
+import datetime as dt
+from functools import partial
+from itertools import product
+
+import numpy as np
+from numpy.random import RandomState
+from numpy.testing import assert_allclose
+import pandas as pd
+import pytest
+
+from arch.data import sp500
+from arch.univariate import (
+    APARCH,
+    ARX,
+    EGARCH,
+    FIGARCH,
+    GARCH,
+    HARCH,
+    HARX,
+    LS,
+    ConstantMean,
+    ConstantVariance,
+    EWMAVariance,
+    MIDASHyperbolic,
+    RiskMetrics2006,
+    ZeroMean,
+    arch_model,
+)
+
+SP500 = 100 * sp500.load()["Adj Close"].pct_change().dropna()
+N = SP500.shape[0]
+SP500_initial = SP500.iloc[: N // 2]
+SP500_append = SP500.iloc[N // 2 :]
+
+
+class HARXWrapper(HARX):
+    def __init__(self, y, x=None, volatility=None):
+        super().__init__(y, lags=[1, 5], x=x, volatility=volatility)
+
+
+class ARXWrapper(ARX):
+    def __init__(self, y, x=None, volatility=None):
+        super().__init__(y, lags=2, x=x, volatility=volatility)
+
+
+MEAN_MODELS = [
+    HARXWrapper,
+    ARXWrapper,
+    ConstantMean,
+    ZeroMean,
+]
+
+VOLATILITIES = [
+    ConstantVariance(),
+    GARCH(),
+    FIGARCH(),
+    EWMAVariance(lam=0.94),
+    MIDASHyperbolic(),
+    HARCH(lags=[1, 5, 22]),
+    RiskMetrics2006(),
+    APARCH(),
+    EGARCH(),
+]
+
+X_MEAN_MODELS = [HARXWrapper, ARXWrapper, LS]
+
+MODEL_SPECS = list(product(MEAN_MODELS, VOLATILITIES))
+
+IDS = [f"{mean.__name__}-{str(vol).split('(')[0]}" for mean, vol in MODEL_SPECS]
+
+
+@pytest.fixture(params=MODEL_SPECS, ids=IDS)
+def mean_volatility(request):
+    mean, vol = request.param
+    return mean, vol
+
+
+def test_append():
+    mod = arch_model(SP500_initial)
+    mod.append(SP500_append)
+    res = mod.fit(disp="off")
+
+    direct = arch_model(SP500)
+    res_direct = direct.fit(disp="off")
+    assert_allclose(res.params, res_direct.params, rtol=1e-5)
+    assert_allclose(res.conditional_volatility, res_direct.conditional_volatility)
+    assert_allclose(res.resid, res_direct.resid)
+    assert_allclose(mod._backcast, direct._backcast)
+
+
+def test_alt_means(mean_volatility):
+    mean, vol = mean_volatility
+    mod = mean(SP500_initial, volatility=vol)
+    mod.append(SP500_append)
+    res = mod.fit(disp="off")
+
+    direct = mean(SP500, volatility=vol)
+    res_direct = direct.fit(disp="off")
+    assert_allclose(res.conditional_volatility, res_direct.conditional_volatility)
+    assert_allclose(res.resid, res_direct.resid)
+    if mod._backcast is not None:
+        assert_allclose(mod._backcast, direct._backcast)
+    else:
+        assert direct._backcast is None
+
+
+def test_append_scalar_no_reestiamtion(mean_volatility):
+    mean, vol = mean_volatility
+    mod = mean(np.asarray(SP500_initial), volatility=vol)
+    for val in np.asarray(SP500_append):
+        mod.append(val)
+
+
+def test_append_scalar_bad_value():
+    mod = HARX(SP500_initial, lags=[1, 5], volatility=GARCH())
+    with pytest.raises(TypeError):
+        mod.append(SP500_append.iloc[0])
+
+
+def test_append_type_mismatch(mean_volatility):
+    mean, vol = mean_volatility
+    mod = mean(SP500_initial, volatility=vol)
+    with pytest.raises(TypeError, match="Input data must be the same"):
+        mod.append(np.asarray(SP500_append))
+    with pytest.raises(TypeError, match="Input data must be the same"):
+        mod.append(SP500_append.tolist())
+
+    mod_arr = mean(np.asarray(SP500_initial), volatility=vol)
+    with pytest.raises(TypeError, match="Input data must be the same"):
+        mod_arr.append(SP500_append)
+    with pytest.raises(TypeError, match="Input data must be the same"):
+        mod_arr.append(SP500_append.tolist())
+
+    mod_list = mean(SP500_initial.tolist(), volatility=vol)
+    with pytest.raises(TypeError, match="Input data must be the same"):
+        mod_list.append(SP500_append)
+    with pytest.raises(TypeError, match="Input data must be the same"):
+        mod_list.append(np.asarray(SP500_append))
+
+
+def test_append_x_type_mismatch():
+    pass
+
+
+@pytest.mark.parametrize("mean", X_MEAN_MODELS)
+def test_bad_append_model_with_exog(mean):
+    mod = mean(SP500_initial, volatility=GARCH())
+    x = pd.DataFrame(
+        np.random.randn(SP500_append.shape[0], 2),
+        columns=["a", "b"],
+        index=SP500_append.index,
+    )
+    with pytest.raises(ValueError, match=""):
+        mod.append(SP500_append, x=x)
+
+    x_initial = pd.DataFrame(
+        np.random.randn(SP500_initial.shape[0], 2),
+        columns=["a", "b"],
+        index=SP500_initial.index,
+    )
+    mod = mean(SP500_initial, x=x_initial, volatility=GARCH())
+    with pytest.raises(ValueError, match=""):
+        mod.append(SP500_append)
+
+
+def test_bad_append_ls():
+    pass
diff --git a/arch/univariate/mean.py b/arch/univariate/mean.py
index 37ae703341..2600a6a971 100644
--- a/arch/univariate/mean.py
+++ b/arch/univariate/mean.py
@@ -326,7 +326,7 @@ def append(self, y: ArrayLike, x: ArrayLike2D | None = None) -> None:
             if self._x.shape[0] != self._y.shape[0]:
                 raise ValueError("x must have the same number of observations as y")
 
-            self._init_model()
+        self._init_model()
 
     def _scale_changed(self):
         """
diff --git a/arch/utility/array.py b/arch/utility/array.py
index 437cfc821b..b85c473f79 100644
--- a/arch/utility/array.py
+++ b/arch/utility/array.py
@@ -12,6 +12,7 @@
 from typing import Any, Literal, overload
 
 import numpy as np
+import pandas as pd
 from pandas import (
     DataFrame,
     DatetimeIndex,
@@ -322,17 +323,25 @@ def find_index(s: AnyPandas, index: int | DateLike) -> int:
 
 
 def append_same_type(original, new):
-    if not isinstance(new, type(original)):
+    append_ok = isinstance(original, (list, np.ndarray)) and isinstance(
+        new, (float, np.floating)
+    )
+    append_ok = append_ok or isinstance(new, type(original))
+    if not append_ok:
         raise TypeError(
-            "Input data must be the same type as the original data. "
-            f"Got {type(new)}, expected {type(original)}."
+            "Input data must be the same type as the original data, unless the "
+            "original was an NDArray or a list, in which case the input data can "
+            f"be a scalar float. Got {type(new)}, expected {type(original)}."
         )
     if isinstance(original, (Series, DataFrame)):
         extended = concat([original, new], axis=0)
     elif isinstance(original, np.ndarray):
-        extended = np.concatenate([original, new])
+        extended = np.concatenate([original, np.atleast_1d(new)])
     elif isinstance(original, list):
-        extended = original + new
+        if isinstance(new, list):
+            extended = original + new
+        else:
+            extended = original + [new]
     else:
         raise TypeError(
             "Input data must be a pandas Series, DataFrame, numpy ndarray, or "