Skip to content

Commit

Permalink
Add mdape to all diagnostics, and fix rolling median behavior.
Browse files Browse the repository at this point in the history
  • Loading branch information
louispotok authored and bletham committed Jun 21, 2019
1 parent 1b01ab9 commit 4225bb5
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 8 deletions.
36 changes: 29 additions & 7 deletions python/fbprophet/diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def performance_metrics(df, metrics=None, rolling_window=0.1):
-------
Dataframe with a column for each metric, and column 'horizon'
"""
valid_metrics = ['mse', 'rmse', 'mae', 'mape', 'coverage']
valid_metrics = ['mse', 'rmse', 'mae', 'mape', 'mdape', 'coverage']
if metrics is None:
metrics = valid_metrics
if len(set(metrics)) != len(metrics):
Expand Down Expand Up @@ -341,7 +341,7 @@ def rolling_median_by_h(x, h, w, name):
df = pd.DataFrame({'x': x, 'h': h})
grouped = df.groupby('h')
df2 = grouped.size().reset_index().sort_values('h')
hs = df2['h'].values
hs = df2['h']

res_h = []
res_x = []
Expand All @@ -350,14 +350,16 @@ def rolling_median_by_h(x, h, w, name):
while i >= 0:
h_i = hs[i]
xs = grouped.get_group(h_i).x.tolist()
j = i - 1
while (len(xs) < w) and (j >= 0):

# wrap in array so this works if h is pandas Series with custom index or numpy array
next_idx_to_add = np.array(h == h_i).argmax() - 1
while (len(xs) < w) and (next_idx_to_add >= 0):
# Include points from the previous horizon. All of them if still
# less than w, otherwise just enough to get to w.
xs.append(x[j])
j -= 1
xs.append(x[next_idx_to_add])
next_idx_to_add -= 1
if len(xs) < w:
# Ran out of horizons before enough points.
# Ran out of points before getting enough.
break
res_h.append(hs[i])
res_x.append(np.median(xs))
Expand Down Expand Up @@ -450,6 +452,26 @@ def mape(df, w):
)


def mdape(df, w):
"""Median absolute percent error
Parameters
----------
df: Cross-validation results dataframe.
w: Aggregation window size.
Returns
-------
Dataframe with columns horizon and mdape.
"""
ape = np.abs((df['y'] - df['yhat']) / df['y'])
if w < 0:
return pd.DataFrame({'horizon': df['horizon'], 'mdape': ape})
return rolling_median_by_h(
x=ape.values, h=df['horizon'], w=w, name='mdape'
)


def smape(df, w):
"""Symmetric mean absolute percentage error
Expand Down
3 changes: 2 additions & 1 deletion python/fbprophet/tests/test_diagnostics.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def test_performance_metrics(self):
df_none = diagnostics.performance_metrics(df_cv, rolling_window=-1)
self.assertEqual(
set(df_none.columns),
{'horizon', 'coverage', 'mae', 'mape', 'mse', 'rmse'},
{'horizon', 'coverage', 'mae', 'mape', 'mdape', 'mse', 'rmse'},
)
self.assertEqual(df_none.shape[0], 16)
# Aggregation level 0
Expand All @@ -130,6 +130,7 @@ def test_performance_metrics(self):
self.assertEqual(df_all.shape[0], 1)
for metric in ['mse', 'mape', 'mae', 'coverage']:
self.assertAlmostEqual(df_all[metric].values[0], df_none[metric].mean())
self.assertAlmostEqual(df_all['mdape'].values[0], df_none['mdape'].median())
# Custom list of metrics
df_horizon = diagnostics.performance_metrics(
df_cv, metrics=['coverage', 'mse'],
Expand Down

0 comments on commit 4225bb5

Please sign in to comment.