Skip to content

Commit

Permalink
- Set p-values to None for sequential tests since they are not valid …
Browse files Browse the repository at this point in the history
…anyway.

- Added is_significant column to difference methods, using multipletests from statsmodels to handle multiple correction methods other than Bonferroni
  • Loading branch information
iampelle committed Oct 14, 2021
1 parent d326c53 commit 9cb3a64
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 39 deletions.
1 change: 1 addition & 0 deletions spotify_confidence/analysis/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Z_CRIT = "z_crit"
ALPHA = 'alpha'
ADJUSTED_ALPHA = 'adjusted_alpha'
IS_SIGNIFICANT = "is_significant"

BONFERRONI = "bonferroni"
HOLM = "holm"
Expand Down
25 changes: 18 additions & 7 deletions spotify_confidence/analysis/frequentist/statsmodels_computer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
proportions_chisquare, proportion_confint, confint_proportions_2indep)
from statsmodels.stats.weightstats import (
_zstat_generic, _zconfint_generic, _tstat_generic, _tconfint_generic)
from statsmodels.stats.multitest import multipletests
from typing import (Union, Iterable, List, Tuple)
from abc import abstractmethod

Expand All @@ -27,7 +28,7 @@
from .sequential_bound_solver import bounds
from ..constants import (POINT_ESTIMATE, VARIANCE, CI_LOWER, CI_UPPER,
DIFFERENCE, P_VALUE, SFX1, SFX2, STD_ERR, ALPHA,
ADJUSTED_ALPHA, ADJUSTED_P, ADJUSTED_LOWER, ADJUSTED_UPPER,
ADJUSTED_ALPHA, ADJUSTED_P, ADJUSTED_LOWER, ADJUSTED_UPPER, IS_SIGNIFICANT,
NULL_HYPOTHESIS, NIM, PREFERENCE, TWO_SIDED,
PREFERENCE_DICT, NIM_TYPE, BONFERRONI, HOLM, HOMMEL, SIMES_HOCHBERG,
SIDAK, HOLM_SIDAK, FDR_BH, FDR_BY, FDR_TSBH, FDR_TSBKY,
Expand Down Expand Up @@ -126,7 +127,7 @@ def compute_difference(self,
return difference_df[listify(groupby) +
['level_1', 'level_2', 'absolute_difference',
DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE] +
[ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P] +
[ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT] +
([NIM, NULL_HYPOTHESIS, PREFERENCE]
if nims is not None else [])]

Expand All @@ -152,7 +153,7 @@ def compute_multiple_difference(self,
return difference_df[listify(groupby) +
['level_1', 'level_2', 'absolute_difference',
DIFFERENCE, CI_LOWER, CI_UPPER, P_VALUE] +
[ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P] +
[ADJUSTED_LOWER, ADJUSTED_UPPER, ADJUSTED_P, IS_SIGNIFICANT] +
([NIM, NULL_HYPOTHESIS, PREFERENCE]
if nims is not None else [])]

Expand Down Expand Up @@ -274,9 +275,9 @@ def _add_p_value_and_ci(self,
df[ALPHA] = 1 - self._interval_size

if(final_expected_sample_size_column is None):
groupby = ['level_1', 'level_2'] + [
column for column in df.index.names if column is not None]
df[ADJUSTED_ALPHA] = (1-self._interval_size)/self._get_num_comparisons(df, self._correction_method, groupby)
groupby = ['level_1', 'level_2'] + [column for column in df.index.names if column is not None]
n_comparisons = self._get_num_comparisons(df, self._correction_method, groupby)
df[ADJUSTED_ALPHA] = (1-self._interval_size) / n_comparisons
else:
df[ADJUSTED_ALPHA] = self._compute_sequential_adjusted_alpha(df,
final_expected_sample_size_column,
Expand All @@ -294,11 +295,21 @@ def _add_p_value_and_ci(self,
return (
df.assign(**{P_VALUE: df.apply(self._p_value, axis=1)})
.assign(**{ADJUSTED_P: lambda df:
df[P_VALUE].map(lambda p: min(p * len(df), 1))})
df[P_VALUE].map(lambda p: min(p * n_comparisons, 1)
if final_expected_sample_size_column is None
else None)})
.assign(**{CI_LOWER: ci_df[CI_LOWER]})
.assign(**{CI_UPPER: ci_df[CI_UPPER]})
.assign(**{ADJUSTED_LOWER: adjusted_ci_df[ADJUSTED_LOWER]})
.assign(**{ADJUSTED_UPPER: adjusted_ci_df[ADJUSTED_UPPER]})
.assign(**{IS_SIGNIFICANT: lambda df: df[P_VALUE] < df[ADJUSTED_ALPHA]
if BONFERRONI in self._correction_method
else multipletests(pvals=df[P_VALUE],
alpha=df[ADJUSTED_ALPHA].values[0],
method=self._correction_method)[0]})
.assign(**{P_VALUE: lambda df: df[P_VALUE]
if final_expected_sample_size_column is None
else None})
)

def _get_num_comparisons(self, df: DataFrame, correction_method: str, groupby: Iterable) -> int:
Expand Down
53 changes: 21 additions & 32 deletions tests/frequentist/test_ztest.py
Original file line number Diff line number Diff line change
Expand Up @@ -1224,10 +1224,8 @@ def test_multiple_difference_groupby(self):
* self.data.country.unique().size
* self.data.metric.unique().size
)
n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()

def test_multiple_difference_plot_groupby(self):
charts = self.test.multiple_difference_plot(
Expand All @@ -1250,10 +1248,8 @@ def test_multiple_difference_groupby_onesided_decrease(self):
* self.data.country.unique().size
* self.data.metric.unique().size
)
n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()

def test_multiple_difference_groupby_onesided_increase(self):
difference_df = self.test.multiple_difference(
Expand All @@ -1268,10 +1264,8 @@ def test_multiple_difference_groupby_onesided_increase(self):
* self.data.country.unique().size
* self.data.metric.unique().size
)
n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()

def test_multiple_difference_groupby_mixed_nims(self):
nims = {(pd.to_datetime('2021-04-01'), 'us', 'm1'): (0.2, 'increase'),
Expand Down Expand Up @@ -1307,10 +1301,8 @@ def test_multiple_difference_groupby_mixed_nims(self):
* self.data.country.unique().size
* self.data.metric.unique().size
)
n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()

difference_df_2 = self.test.multiple_difference(
level='control',
Expand All @@ -1319,7 +1311,9 @@ def test_multiple_difference_groupby_mixed_nims(self):
non_inferiority_margins=True,
final_expected_sample_size_column='final_sample_size')

assert (difference_df == difference_df_2).all().all()
for column in difference_df.columns:
assert (difference_df[column] == difference_df_2[column]).all() or \
(difference_df['p-value'].isnull() == difference_df_2['p-value'].isnull()).all()


DATE = 'date'
Expand Down Expand Up @@ -1935,10 +1929,8 @@ def test_multiple_difference_groupby(self):
'date == "2020-04-02" and country == "swe" and platform == "ios" and metric=="bananas_per_user_7d"')[
ADJUSTED_UPPER].values[0])

n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()

difference_df_2 = self.test.multiple_difference(
level='1',
Expand All @@ -1947,7 +1939,9 @@ def test_multiple_difference_groupby(self):
final_expected_sample_size_column='final_expected_sample_size',
non_inferiority_margins=True)

assert (difference_df == difference_df_2).all().all()
for column in difference_df.columns:
assert (difference_df[column] == difference_df_2[column]).all() or \
(difference_df['p-value'].isnull() == difference_df_2['p-value'].isnull()).all()


class TestSequentialOneSided(object):
Expand Down Expand Up @@ -1998,11 +1992,8 @@ def test_multiple_difference_groupby(self):
(self.data.group.unique().size - 1)
* self.data.date.unique().size
)
n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01
)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()
assert np.isinf(difference_df[CI_UPPER].values[0])
np.testing.assert_almost_equal(difference_df[ADJUSTED_LOWER].values[0], -4.129515314002298, 3)
np.testing.assert_almost_equal(difference_df[DIFFERENCE].values[0], -4.001416, 3)
Expand Down Expand Up @@ -2056,11 +2047,9 @@ def test_multiple_difference_groupby(self):
(self.data.group.unique().size - 1)
* self.data.date.unique().size
)
n_comp = len(difference_df)
assert np.allclose(
difference_df['p-value'].map(lambda p: min(1, n_comp * p)),
difference_df['adjusted p-value'], rtol=0.01
)
assert difference_df['p-value'].isnull().all()
assert difference_df['adjusted p-value'].isnull().all()

np.testing.assert_almost_equal(difference_df[ADJUSTED_UPPER].values[0], 0.121, 3)
np.testing.assert_almost_equal(difference_df[ADJUSTED_LOWER].values[0], -0.151, 3)
np.testing.assert_almost_equal(difference_df[DIFFERENCE].values[0], -0.0149, 3)
Expand Down

0 comments on commit 9cb3a64

Please sign in to comment.