Skip to content

Commit

Permalink
Add geomean summary row to output from utils/compare.py.
Browse files Browse the repository at this point in the history
This change adds a summary row with the geometric mean of the selected
metric to the output of utils/compare.py. The 'rhs' values are
normalized to the 'lhs' values and the geometric mean of the results
is computed.

This should be similar to the geomean row in the LNT HTML UI and allow
to compare 2 sets of runs.

Reviewers: anemet, MatzeB, cmatthews, serge-sans-paille

Reviewed By: anemet

Differential Revision: https://reviews.llvm.org/D57828

llvm-svn: 356545
  • Loading branch information
fhahn committed Mar 20, 2019
1 parent 1b240a7 commit 0ee69b8
Showing 1 changed file with 38 additions and 12 deletions.
50 changes: 38 additions & 12 deletions utils/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from __future__ import print_function

import pandas as pd
from scipy import stats
import sys
import os.path
import re
Expand Down Expand Up @@ -108,28 +109,40 @@ def readmulti(filenames):
d = pd.concat(datasets, axis=0, names=['run'], keys=datasetnames)
return d

def add_diff_column(d, absolute_diff=False):
values = d.unstack(level=0)

has_two_runs = d.index.get_level_values(0).nunique() == 2
def get_values(values):
# Create data view without diff column.
if 'diff' in values.columns:
values = values[[c for c in values.columns if c != 'diff']]
has_two_runs = len(values.columns) == 2
if has_two_runs:
values0 = values.iloc[:,0]
values1 = values.iloc[:,1]
return (values.iloc[:,0], values.iloc[:,1])
else:
values0 = values.min(axis=1)
values1 = values.max(axis=1)
return (values.min(axis=1), values.max(axis=1))

def add_diff_column(values, absolute_diff=False):
values0, values1 = get_values(values)
# Quotient or absolute difference?
if absolute_diff:
values['diff'] = values1 - values0
else:
values['diff'] = values1 / values0
values['diff'] -= 1.0
# unstack() gave us a complicated multiindex for the columns, simplify
# things by renaming to a simple index.
values.columns = [(c[1] if c[1] else c[0]) for c in values.columns.values]
return values

def add_geomean_row(data, dataout):
"""
Normalize values1 over values0, compute geomean difference and add a
summary row to dataout.
"""
values0, values1 = get_values(data)
relative = values1 / values0
gm_diff = stats.gmean(relative) - 1.0

gm_row = {c: '' for c in dataout.columns}
gm_row['diff'] = gm_diff
gm_row['Program'] = 'Geomean difference'
return dataout.append(gm_row, ignore_index=True)

def filter_failed(data, key='Exec'):
return data.loc[data[key] == "pass"]

Expand Down Expand Up @@ -209,6 +222,9 @@ def print_result(d, limit_output=True, shorten_names=True,
# Turn index into a column so we can format it...
dataout.insert(0, 'Program', dataout.index)

if show_diff_column:
dataout = add_geomean_row(d, dataout)

formatters = dict()
formatters['diff'] = format_diff
if shorten_names:
Expand All @@ -220,7 +236,11 @@ def format_name(name, common_prefix, common_suffix):
return "%-45s" % truncate(name, 10, 30)

formatters['Program'] = lambda name: format_name(name, drop_prefix, drop_suffix)
float_format = lambda x: "%6.2f" % (x,)
def float_format(x):
if x == '':
return ''
return "%6.2f" % (x,)

pd.set_option("display.max_colwidth", 0)
out = dataout.to_string(index=False, justify='left',
float_format=float_format, formatters=formatters)
Expand Down Expand Up @@ -334,6 +354,12 @@ def format_name(name, common_prefix, common_suffix):
print("Metric: %s" % (",".join(metrics),))
if len(metrics) > 0:
data = data[metrics]

data = data.unstack(level=0)
# unstack() gave us a complicated multiindex for the columns, simplify
# things by renaming to a simple index.
data.columns = [(c[1] if c[1] else c[0]) for c in data.columns.values]

data = add_diff_column(data)

sortkey = 'diff'
Expand Down

0 comments on commit 0ee69b8

Please sign in to comment.