Skip to content

Commit

Permalink
benchstat, cmd/benchstat: group benchmark results
Browse files Browse the repository at this point in the history
Go 1.9 and up write "pkg", "goos", and "goarch" keys in benchmark
output. benchstat now understands benchmark labels, and uses them to
separate incomparable benchmark results. cmd/benchstat gains a
command-line flag called "-split" to control this, defaulting to
"pkg,goos,goarch".

Change-Id: I00413ab348bbff31743b59e81d88c4faab1a8dca
Reviewed-on: https://go-review.googlesource.com/38584
Run-TryBot: Quentin Smith <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Russ Cox <[email protected]>
  • Loading branch information
quentinmit committed Apr 7, 2017
1 parent cab923d commit add18dd
Show file tree
Hide file tree
Showing 13 changed files with 460 additions and 162 deletions.
1 change: 1 addition & 0 deletions analysis/app/compare.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ func (a *App) compareQuery(q string) *compareData {
// Compute benchstat
c := &benchstat.Collection{
AddGeoMean: true,
SplitBy: []string{"pkg", "goos", "goarch"},
}
for _, g := range groups {
c.AddResults(g.Q, g.results)
Expand Down
98 changes: 68 additions & 30 deletions benchstat/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package benchstat

import (
"bytes"
"fmt"
"strconv"
"strings"
Expand All @@ -15,10 +16,15 @@ import (

// A Collection is a collection of benchmark results.
type Collection struct {
// Configs, Benchmarks, and Units give the set of configs,
// benchmarks, and units from the keys in Stats in an order
// Configs, Groups, and Units give the set of configs,
// groups, and units from the keys in Stats in an order
// meant to match the order the benchmarks were read in.
Configs, Benchmarks, Units []string
Configs, Groups, Units []string

// Benchmarks gives the set of benchmarks from the keys in
// Stats by group in an order meant to match the order
// benchmarks were read in.
Benchmarks map[string][]string

// Metrics holds the accumulated metrics for each key.
Metrics map[Key]*Metrics
Expand All @@ -34,13 +40,17 @@ type Collection struct {
// AddGeoMean specifies whether to add a line to the table
// showing the geometric mean of all the benchmark results.
AddGeoMean bool

// SplitBy specifies the labels to split results by.
// By default, results will only be split by full name.
SplitBy []string
}

// A Key identifies one metric (e.g., "ns/op", "B/op") from one
// benchmark (function name sans "Benchmark" prefix) in one
// configuration (input file name).
// benchmark (function name sans "Benchmark" prefix) and optional
// group in one configuration (input file name).
type Key struct {
Config, Benchmark, Unit string
Config, Group, Benchmark, Unit string
}

// A Metrics holds the measurements of a single metric
Expand Down Expand Up @@ -129,7 +139,13 @@ func (c *Collection) addMetrics(key Key) *Metrics {
*strings = append(*strings, add)
}
addString(&c.Configs, key.Config)
addString(&c.Benchmarks, key.Benchmark)
addString(&c.Groups, key.Group)
if c.Benchmarks == nil {
c.Benchmarks = make(map[string][]string)
}
benchmarks := c.Benchmarks[key.Group]
addString(&benchmarks, key.Benchmark)
c.Benchmarks[key.Group] = benchmarks
addString(&c.Units, key.Unit)
m := &Metrics{Unit: key.Unit}
c.Metrics[key] = m
Expand All @@ -141,43 +157,65 @@ func (c *Collection) addMetrics(key Key) *Metrics {
func (c *Collection) AddConfig(config string, data []byte) {
c.Configs = append(c.Configs, config)
key := Key{Config: config}
c.addText(key, string(data))
br := benchfmt.NewReader(bytes.NewReader(data))
for br.Next() {
c.addResult(key, br.Result())
}
if err := br.Err(); err != nil {
// bytes.Reader never returns errors
panic(err)
}
}

// AddResults adds the benchmark results to the named configuration.
func (c *Collection) AddResults(config string, results []*benchfmt.Result) {
c.Configs = append(c.Configs, config)
key := Key{Config: config}
for _, r := range results {
c.addText(key, r.Content)
c.addResult(key, r)
}
}

func (c *Collection) addText(key Key, data string) {
for _, line := range strings.Split(string(data), "\n") {
f := strings.Fields(line)
if len(f) < 4 {
continue
}
name := f[0]
if !strings.HasPrefix(name, "Benchmark") {
continue
}
name = strings.TrimPrefix(name, "Benchmark")
n, _ := strconv.Atoi(f[1])
if n == 0 {
func (c *Collection) addResult(key Key, r *benchfmt.Result) {
f := strings.Fields(r.Content)
if len(f) < 4 {
return
}
name := f[0]
if !strings.HasPrefix(name, "Benchmark") {
return
}
name = strings.TrimPrefix(name, "Benchmark")
n, _ := strconv.Atoi(f[1])
if n == 0 {
return
}
key.Group = c.makeGroup(r)
key.Benchmark = name
for i := 2; i+2 <= len(f); i += 2 {
val, err := strconv.ParseFloat(f[i], 64)
if err != nil {
continue
}
key.Unit = f[i+1]
m := c.addMetrics(key)
m.Values = append(m.Values, val)
}
}

key.Benchmark = name
for i := 2; i+2 <= len(f); i += 2 {
val, err := strconv.ParseFloat(f[i], 64)
if err != nil {
continue
func (c *Collection) makeGroup(r *benchfmt.Result) string {
var out string
for _, s := range c.SplitBy {
v := r.NameLabels[s]
if v == "" {
v = r.Labels[s]
}
if v != "" {
if out != "" {
out = out + " "
}
key.Unit = f[i+1]
m := c.addMetrics(key)
m.Values = append(m.Values, val)
out += fmt.Sprintf("%s:%s", s, v)
}
}
return out
}
33 changes: 32 additions & 1 deletion benchstat/html.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,17 @@ var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(`
<tr><th><th>{{.Metric}}
{{else -}}
<tr><th><th colspan='{{len .Configs}}' class='metric'>{{.Metric}}{{if .OldNewDelta}}<th>delta{{end}}
{{end}}{{range $row := $table.Rows -}}
{{end}}{{range $group := group $table.Rows -}}
{{if and (gt (len $table.Groups) 1) (len (index . 0).Group)}}<tr class='group'><th colspan='{{colspan (len $table.Configs) $table.OldNewDelta}}'>{{(index . 0).Group}}{{end}}
{{- range $row := . -}}
{{if $table.OldNewDelta -}}
<tr class='{{if eq .Change 1}}better{{else if eq .Change -1}}worse{{else}}unchanged{{end}}'>
{{- else -}}
<tr>
{{- end -}}
<td>{{.Benchmark}}{{range .Metrics}}<td>{{.Format $row.Scaler}}{{end}}{{if $table.OldNewDelta}}<td class='{{if eq .Delta "~"}}nodelta{{else}}delta{{end}}'>{{replace .Delta "-" "−" -1}}<td class='note'>{{.Note}}{{end}}
{{end -}}
{{- end -}}
<tr><td>&nbsp;
</tbody>
{{end}}
Expand All @@ -42,6 +45,34 @@ var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(`

var htmlFuncs = template.FuncMap{
"replace": strings.Replace,
"group": htmlGroup,
"colspan": htmlColspan,
}

func htmlColspan(configs int, delta bool) int {
if delta {
configs++
}
return configs + 1
}

func htmlGroup(rows []*Row) (out [][]*Row) {
var group string
var cur []*Row
for _, r := range rows {
if r.Group != group {
group = r.Group
if len(cur) > 0 {
out = append(out, cur)
cur = nil
}
}
cur = append(cur, r)
}
if len(cur) > 0 {
out = append(out, cur)
}
return
}

// FormatHTML appends an HTML formatting of the tables to buf.
Expand Down
125 changes: 68 additions & 57 deletions benchstat/table.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@ type Table struct {
Metric string
OldNewDelta bool // is this an old-new-delta table?
Configs []string
Groups []string
Rows []*Row
}

// A Row is a table row for display in the benchstat output.
type Row struct {
Benchmark string // benchmark name
Group string // group name
Scaler Scaler // formatter for stats means
Metrics []*Metrics // columns of statistics
Delta string // formatted percent change
Expand Down Expand Up @@ -49,61 +51,68 @@ func (c *Collection) Tables() []*Table {
for _, key.Unit = range c.Units {
table := new(Table)
table.Configs = c.Configs
table.Groups = c.Groups
table.Metric = metricOf(key.Unit)
table.OldNewDelta = len(c.Configs) == 2
for _, key.Benchmark = range c.Benchmarks {
row := &Row{Benchmark: key.Benchmark}

for _, key.Config = range c.Configs {
m := c.Metrics[key]
if m == nil {
row.Metrics = append(row.Metrics, new(Metrics))
continue
for _, key.Group = range c.Groups {
for _, key.Benchmark = range c.Benchmarks[key.Group] {
row := &Row{Benchmark: key.Benchmark}
if len(c.Groups) > 1 {
// Show group headers if there is more than one group.
row.Group = key.Group
}
row.Metrics = append(row.Metrics, m)
if row.Scaler == nil {
row.Scaler = NewScaler(m.Mean, m.Unit)
}
}

// If there are only two configs being compared, add stats.
if table.OldNewDelta {
k0 := key
k0.Config = c.Configs[0]
k1 := key
k1.Config = c.Configs[1]
old := c.Metrics[k0]
new := c.Metrics[k1]
// If one is missing, omit row entirely.
// TODO: Control this better.
if old == nil || new == nil {
continue
}
pval, testerr := deltaTest(old, new)
row.Delta = "~"
if testerr == stats.ErrZeroVariance {
row.Note = "(zero variance)"
} else if testerr == stats.ErrSampleSize {
row.Note = "(too few samples)"
} else if testerr == stats.ErrSamplesEqual {
row.Note = "(all equal)"
} else if testerr != nil {
row.Note = fmt.Sprintf("(%s)", testerr)
} else if pval < alpha {
pct := ((new.Mean / old.Mean) - 1.0) * 100.0
row.Delta = fmt.Sprintf("%+.2f%%", pct)
if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
row.Change = +1
} else {
row.Change = -1
for _, key.Config = range c.Configs {
m := c.Metrics[key]
if m == nil {
row.Metrics = append(row.Metrics, new(Metrics))
continue
}
row.Metrics = append(row.Metrics, m)
if row.Scaler == nil {
row.Scaler = NewScaler(m.Mean, m.Unit)
}
}
if row.Note == "" && pval != -1 {
row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))

// If there are only two configs being compared, add stats.
if table.OldNewDelta {
k0 := key
k0.Config = c.Configs[0]
k1 := key
k1.Config = c.Configs[1]
old := c.Metrics[k0]
new := c.Metrics[k1]
// If one is missing, omit row entirely.
// TODO: Control this better.
if old == nil || new == nil {
continue
}
pval, testerr := deltaTest(old, new)
row.Delta = "~"
if testerr == stats.ErrZeroVariance {
row.Note = "(zero variance)"
} else if testerr == stats.ErrSampleSize {
row.Note = "(too few samples)"
} else if testerr == stats.ErrSamplesEqual {
row.Note = "(all equal)"
} else if testerr != nil {
row.Note = fmt.Sprintf("(%s)", testerr)
} else if pval < alpha {
pct := ((new.Mean / old.Mean) - 1.0) * 100.0
row.Delta = fmt.Sprintf("%+.2f%%", pct)
if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
row.Change = +1
} else {
row.Change = -1
}
}
if row.Note == "" && pval != -1 {
row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
}
}
}

table.Rows = append(table.Rows, row)
table.Rows = append(table.Rows, row)
}
}

if len(table.Rows) > 0 {
Expand Down Expand Up @@ -140,16 +149,18 @@ func addGeomean(c *Collection, t *Table, unit string, delta bool) {
maxCount := 0
for _, key.Config = range c.Configs {
var means []float64
for _, key.Benchmark = range c.Benchmarks {
m := c.Metrics[key]
// Omit 0 values from the geomean calculation,
// as these either make the geomean undefined
// or zero (depending on who you ask). This
// typically comes up with things like
// allocation counts, where it's fine to just
// ignore the benchmark.
if m != nil && m.Mean != 0 {
means = append(means, m.Mean)
for _, key.Group = range c.Groups {
for _, key.Benchmark = range c.Benchmarks[key.Group] {
m := c.Metrics[key]
// Omit 0 values from the geomean calculation,
// as these either make the geomean undefined
// or zero (depending on who you ask). This
// typically comes up with things like
// allocation counts, where it's fine to just
// ignore the benchmark.
if m != nil && m.Mean != 0 {
means = append(means, m.Mean)
}
}
}
if len(means) > maxCount {
Expand Down
Loading

0 comments on commit add18dd

Please sign in to comment.