Skip to content

Commit

Permalink
added K-S test
Browse files Browse the repository at this point in the history
  • Loading branch information
leifwalsh committed Jul 31, 2017
1 parent ebefa70 commit ca49e61
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 4 deletions.
6 changes: 6 additions & 0 deletions docs/analyze.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
========================
:mod:`perfume.analyze`
========================

.. automodule:: perfume.analyze
:members:
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode']

# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
Expand Down Expand Up @@ -111,7 +111,7 @@

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'default'
html_theme = 'sphinx_rtd_theme'

# Theme options are theme-specific and customize the look and feel of a
# theme further. For a list of options available for each theme, see the
Expand Down
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Contents:
readme
installation
usage
modules
analyze
contributing
authors
history
Expand Down
44 changes: 44 additions & 0 deletions perfume/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import bokeh.io as bi
import bokeh.models as bm
import bokeh.plotting as bp
import numpy as np
import pandas as pd
from scipy import stats

from perfume import colors

Expand Down Expand Up @@ -60,6 +62,48 @@ def timings_in_context(samples):
return t


def _ks_Z(a, b):
result = stats.ks_2samp(a, b)
n = len(a)
m = len(b)
return result.statistic / np.sqrt((n + m) / (n * m))


def ks_test(samples):
'''Runs the Kolmogorov-Smirnov test across functions.
Returns a DataFrame containing all pairwise K-S test results.
The standard K-S test computes :math:`D`, which is the maximum
difference between the empirical CDFs.
The result value we return is the :math:`Z` value, defined as
.. math::
Z = D / \\sqrt{(n + m) / nm}
where :math:`n` and :math:`m` are the respective sample sizes.
:math:`Z` is typically interpreted using a lookup table, i.e. for
confidence level :math:`\\alpha`, we want to see a :math:`Z`
greater than :math:`c(\\alpha)`:
+--------------------+------+------+-------+------+-------+-------+
| :math:`\\alpha` | 0.10 | 0.05 | 0.025 | 0.01 | 0.005 | 0.001 |
+--------------------+------+------+-------+------+-------+-------+
| :math:`c(\\alpha)` | 1.22 | 1.36 | 1.48 | 1.63 | 1.73 | 1.95 |
+--------------------+------+------+-------+------+-------+-------+
'''
t = timings(samples)
data = {name: ([_ks_Z(t[name].values, t[t.columns[j]].values)
for j in range(i + 1)]
+ ([np.nan] * (len(t.columns) - 2 - i)))
for i, name in enumerate(t.columns[1:])}
idx = pd.Index(t.columns[:-1], name='K-S test Z')
return pd.DataFrame(data, index=idx)


def _cumulative_quantiles(group, rng):
group = isolate(group)
t = timings(group)
Expand Down
17 changes: 16 additions & 1 deletion perfume/perfume.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from bokeh import io as bi
from bokeh import models as bm
import bokeh.palettes
from bokeh import plotting as bp
from IPython import display as ipdisplay
import numpy as np
Expand Down Expand Up @@ -111,6 +112,16 @@ def initialize_plot(self, title):
self._elapsed_rendering_seconds -= timer.elapsed_seconds()
return plot

@staticmethod
def _ks_style(s):
if np.isnan(s):
return ''
else:
thresholds = [1.22, 1.36, 1.48, 1.63, 1.73, 1.95]
cs = list(reversed(bokeh.palettes.RdYlGn[len(thresholds) + 1]))
color = cs[np.searchsorted(thresholds, s)]
return 'background-color: {}'.format(color)

def update(self, samples):
with Timer() as timer:
timings = analyze.timings(samples)
Expand Down Expand Up @@ -142,7 +153,11 @@ def update(self, samples):
'y': [whisker_height]
}

self._describe_widget.data = timings.describe().to_html()
ks_frame = analyze.ks_test(samples)
html = (timings.describe().to_html()
+ ks_frame.style.applymap(self._ks_style).render())
self._describe_widget.data = html.replace(
'table','table style="display:inline"')
total_bench_time = timings[self._initial_size:].sum().sum() / 1000.
elapsed = time.perf_counter() - self._start
num_samples = len(timings.index)
Expand Down

0 comments on commit ca49e61

Please sign in to comment.