Add mdape to all diagnostics, and fix rolling median behavior.

mj-01 · Jun 21, 2019 · 4225bb5 · 4225bb5
1 parent 1b01ab9
commit 4225bb5
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 8 deletions.
diff --git a/python/fbprophet/diagnostics.py b/python/fbprophet/diagnostics.py
@@ -231,7 +231,7 @@ def performance_metrics(df, metrics=None, rolling_window=0.1):
     -------
     Dataframe with a column for each metric, and column 'horizon'
     """
-    valid_metrics = ['mse', 'rmse', 'mae', 'mape', 'coverage']
+    valid_metrics = ['mse', 'rmse', 'mae', 'mape', 'mdape', 'coverage']
     if metrics is None:
         metrics = valid_metrics
     if len(set(metrics)) != len(metrics):
@@ -341,7 +341,7 @@ def rolling_median_by_h(x, h, w, name):
     df = pd.DataFrame({'x': x, 'h': h})
     grouped = df.groupby('h')
     df2 = grouped.size().reset_index().sort_values('h')
-    hs = df2['h'].values
+    hs = df2['h']
 
     res_h = []
     res_x = []
@@ -350,14 +350,16 @@ def rolling_median_by_h(x, h, w, name):
     while i >= 0:
         h_i = hs[i]
         xs = grouped.get_group(h_i).x.tolist()
-        j = i - 1
-        while (len(xs) < w) and (j >= 0):
+
+        # wrap in array so this works if h is pandas Series with custom index or numpy array
+        next_idx_to_add = np.array(h == h_i).argmax() - 1
+        while (len(xs) < w) and (next_idx_to_add >= 0):
             # Include points from the previous horizon. All of them if still
             # less than w, otherwise just enough to get to w.
-            xs.append(x[j])
-            j -= 1
+            xs.append(x[next_idx_to_add])
+            next_idx_to_add -= 1
         if len(xs) < w:
-            # Ran out of horizons before enough points.
+            # Ran out of points before getting enough.
             break
         res_h.append(hs[i])
         res_x.append(np.median(xs))
@@ -450,6 +452,26 @@ def mape(df, w):
     )
 
 
+def mdape(df, w):
+    """Median absolute percent error
+
+    Parameters
+    ----------
+    df: Cross-validation results dataframe.
+    w: Aggregation window size.
+
+    Returns
+    -------
+    Dataframe with columns horizon and mdape.
+    """
+    ape = np.abs((df['y'] - df['yhat']) / df['y'])
+    if w < 0:
+        return pd.DataFrame({'horizon': df['horizon'], 'mdape': ape})
+    return rolling_median_by_h(
+        x=ape.values, h=df['horizon'], w=w, name='mdape'
+    )
+
+
 def smape(df, w):
     """Symmetric mean absolute percentage error
 

diff --git a/python/fbprophet/tests/test_diagnostics.py b/python/fbprophet/tests/test_diagnostics.py
@@ -114,7 +114,7 @@ def test_performance_metrics(self):
         df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1)
         self.assertEqual(
             set(df_none.columns),
-            {'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
+            {'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse'},
         )
         self.assertEqual(df_none.shape[0], 16)
         # Aggregation level 0
@@ -130,6 +130,7 @@ def test_performance_metrics(self):
         self.assertEqual(df_all.shape[0], 1)
         for metric in ['mse', 'mape', 'mae', 'coverage']:
             self.assertAlmostEqual(df_all[metric].values[0], df_none[metric].mean())
+        self.assertAlmostEqual(df_all['mdape'].values[0], df_none['mdape'].median())
         # Custom list of metrics
         df_horizon = diagnostics.performance_metrics(
             df_cv, metrics=['coverage', 'mse'],