benchstat, cmd/benchstat: group benchmark results

Go 1.9 and up write "pkg", "goos", and "goarch" keys in benchmark output. benchstat now understands benchmark labels, and uses them to separate incomparable benchmark results. cmd/benchstat gains a command-line flag called "-split" to control this, defaulting to "pkg,goos,goarch". Change-Id: I00413ab348bbff31743b59e81d88c4faab1a8dca Reviewed-on: https://go-review.googlesource.com/38584 Run-TryBot: Quentin Smith <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Russ Cox <[email protected]>
golang · Apr 7, 2017 · add18dd · add18dd
1 parent cab923d
commit add18dd
Show file tree

Hide file tree

Showing 13 changed files with 460 additions and 162 deletions.
diff --git a/analysis/app/compare.go b/analysis/app/compare.go
@@ -288,6 +288,7 @@ func (a *App) compareQuery(q string) *compareData {
 	// Compute benchstat
 	c := &benchstat.Collection{
 		AddGeoMean: true,
+		SplitBy:    []string{"pkg", "goos", "goarch"},
 	}
 	for _, g := range groups {
 		c.AddResults(g.Q, g.results)

diff --git a/benchstat/data.go b/benchstat/data.go
@@ -5,6 +5,7 @@
 package benchstat
 
 import (
+	"bytes"
 	"fmt"
 	"strconv"
 	"strings"
@@ -15,10 +16,15 @@ import (
 
 // A Collection is a collection of benchmark results.
 type Collection struct {
-	// Configs, Benchmarks, and Units give the set of configs,
-	// benchmarks, and units from the keys in Stats in an order
+	// Configs, Groups, and Units give the set of configs,
+	// groups, and units from the keys in Stats in an order
 	// meant to match the order the benchmarks were read in.
-	Configs, Benchmarks, Units []string
+	Configs, Groups, Units []string
+
+	// Benchmarks gives the set of benchmarks from the keys in
+	// Stats by group in an order meant to match the order
+	// benchmarks were read in.
+	Benchmarks map[string][]string
 
 	// Metrics holds the accumulated metrics for each key.
 	Metrics map[Key]*Metrics
@@ -34,13 +40,17 @@ type Collection struct {
 	// AddGeoMean specifies whether to add a line to the table
 	// showing the geometric mean of all the benchmark results.
 	AddGeoMean bool
+
+	// SplitBy specifies the labels to split results by.
+	// By default, results will only be split by full name.
+	SplitBy []string
 }
 
 // A Key identifies one metric (e.g., "ns/op", "B/op") from one
-// benchmark (function name sans "Benchmark" prefix) in one
-// configuration (input file name).
+// benchmark (function name sans "Benchmark" prefix) and optional
+// group in one configuration (input file name).
 type Key struct {
-	Config, Benchmark, Unit string
+	Config, Group, Benchmark, Unit string
 }
 
 // A Metrics holds the measurements of a single metric
@@ -129,7 +139,13 @@ func (c *Collection) addMetrics(key Key) *Metrics {
 		*strings = append(*strings, add)
 	}
 	addString(&c.Configs, key.Config)
-	addString(&c.Benchmarks, key.Benchmark)
+	addString(&c.Groups, key.Group)
+	if c.Benchmarks == nil {
+		c.Benchmarks = make(map[string][]string)
+	}
+	benchmarks := c.Benchmarks[key.Group]
+	addString(&benchmarks, key.Benchmark)
+	c.Benchmarks[key.Group] = benchmarks
 	addString(&c.Units, key.Unit)
 	m := &Metrics{Unit: key.Unit}
 	c.Metrics[key] = m
@@ -141,43 +157,65 @@ func (c *Collection) addMetrics(key Key) *Metrics {
 func (c *Collection) AddConfig(config string, data []byte) {
 	c.Configs = append(c.Configs, config)
 	key := Key{Config: config}
-	c.addText(key, string(data))
+	br := benchfmt.NewReader(bytes.NewReader(data))
+	for br.Next() {
+		c.addResult(key, br.Result())
+	}
+	if err := br.Err(); err != nil {
+		// bytes.Reader never returns errors
+		panic(err)
+	}
 }
 
 // AddResults adds the benchmark results to the named configuration.
 func (c *Collection) AddResults(config string, results []*benchfmt.Result) {
 	c.Configs = append(c.Configs, config)
 	key := Key{Config: config}
 	for _, r := range results {
-		c.addText(key, r.Content)
+		c.addResult(key, r)
 	}
 }
 
-func (c *Collection) addText(key Key, data string) {
-	for _, line := range strings.Split(string(data), "\n") {
-		f := strings.Fields(line)
-		if len(f) < 4 {
-			continue
-		}
-		name := f[0]
-		if !strings.HasPrefix(name, "Benchmark") {
-			continue
-		}
-		name = strings.TrimPrefix(name, "Benchmark")
-		n, _ := strconv.Atoi(f[1])
-		if n == 0 {
+func (c *Collection) addResult(key Key, r *benchfmt.Result) {
+	f := strings.Fields(r.Content)
+	if len(f) < 4 {
+		return
+	}
+	name := f[0]
+	if !strings.HasPrefix(name, "Benchmark") {
+		return
+	}
+	name = strings.TrimPrefix(name, "Benchmark")
+	n, _ := strconv.Atoi(f[1])
+	if n == 0 {
+		return
+	}
+	key.Group = c.makeGroup(r)
+	key.Benchmark = name
+	for i := 2; i+2 <= len(f); i += 2 {
+		val, err := strconv.ParseFloat(f[i], 64)
+		if err != nil {
 			continue
 		}
+		key.Unit = f[i+1]
+		m := c.addMetrics(key)
+		m.Values = append(m.Values, val)
+	}
+}
 
-		key.Benchmark = name
-		for i := 2; i+2 <= len(f); i += 2 {
-			val, err := strconv.ParseFloat(f[i], 64)
-			if err != nil {
-				continue
+func (c *Collection) makeGroup(r *benchfmt.Result) string {
+	var out string
+	for _, s := range c.SplitBy {
+		v := r.NameLabels[s]
+		if v == "" {
+			v = r.Labels[s]
+		}
+		if v != "" {
+			if out != "" {
+				out = out + " "
 			}
-			key.Unit = f[i+1]
-			m := c.addMetrics(key)
-			m.Values = append(m.Values, val)
+			out += fmt.Sprintf("%s:%s", s, v)
 		}
 	}
+	return out
 }
diff --git a/benchstat/html.go b/benchstat/html.go
@@ -25,14 +25,17 @@ var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(`
 <tr><th><th>{{.Metric}}
 {{else -}}
 <tr><th><th colspan='{{len .Configs}}' class='metric'>{{.Metric}}{{if .OldNewDelta}}<th>delta{{end}}
-{{end}}{{range $row := $table.Rows -}}
+{{end}}{{range $group := group $table.Rows -}}
+{{if and (gt (len $table.Groups) 1) (len (index . 0).Group)}}<tr class='group'><th colspan='{{colspan (len $table.Configs) $table.OldNewDelta}}'>{{(index . 0).Group}}{{end}}
+{{- range $row := . -}}
 {{if $table.OldNewDelta -}}
 <tr class='{{if eq .Change 1}}better{{else if eq .Change -1}}worse{{else}}unchanged{{end}}'>
 {{- else -}}
 <tr>
 {{- end -}}
 <td>{{.Benchmark}}{{range .Metrics}}<td>{{.Format $row.Scaler}}{{end}}{{if $table.OldNewDelta}}<td class='{{if eq .Delta "~"}}nodelta{{else}}delta{{end}}'>{{replace .Delta "-" "−" -1}}<td class='note'>{{.Note}}{{end}}
 {{end -}}
+{{- end -}}
 <tr><td>&nbsp;
 </tbody>
 {{end}}
@@ -42,6 +45,34 @@ var htmlTemplate = template.Must(template.New("").Funcs(htmlFuncs).Parse(`
 
 var htmlFuncs = template.FuncMap{
 	"replace": strings.Replace,
+	"group":   htmlGroup,
+	"colspan": htmlColspan,
+}
+
+func htmlColspan(configs int, delta bool) int {
+	if delta {
+		configs++
+	}
+	return configs + 1
+}
+
+func htmlGroup(rows []*Row) (out [][]*Row) {
+	var group string
+	var cur []*Row
+	for _, r := range rows {
+		if r.Group != group {
+			group = r.Group
+			if len(cur) > 0 {
+				out = append(out, cur)
+				cur = nil
+			}
+		}
+		cur = append(cur, r)
+	}
+	if len(cur) > 0 {
+		out = append(out, cur)
+	}
+	return
 }
 
 // FormatHTML appends an HTML formatting of the tables to buf.

diff --git a/benchstat/table.go b/benchstat/table.go
@@ -15,12 +15,14 @@ type Table struct {
 	Metric      string
 	OldNewDelta bool // is this an old-new-delta table?
 	Configs     []string
+	Groups      []string
 	Rows        []*Row
 }
 
 // A Row is a table row for display in the benchstat output.
 type Row struct {
 	Benchmark string     // benchmark name
+	Group     string     // group name
 	Scaler    Scaler     // formatter for stats means
 	Metrics   []*Metrics // columns of statistics
 	Delta     string     // formatted percent change
@@ -49,61 +51,68 @@ func (c *Collection) Tables() []*Table {
 	for _, key.Unit = range c.Units {
 		table := new(Table)
 		table.Configs = c.Configs
+		table.Groups = c.Groups
 		table.Metric = metricOf(key.Unit)
 		table.OldNewDelta = len(c.Configs) == 2
-		for _, key.Benchmark = range c.Benchmarks {
-			row := &Row{Benchmark: key.Benchmark}
-
-			for _, key.Config = range c.Configs {
-				m := c.Metrics[key]
-				if m == nil {
-					row.Metrics = append(row.Metrics, new(Metrics))
-					continue
+		for _, key.Group = range c.Groups {
+			for _, key.Benchmark = range c.Benchmarks[key.Group] {
+				row := &Row{Benchmark: key.Benchmark}
+				if len(c.Groups) > 1 {
+					// Show group headers if there is more than one group.
+					row.Group = key.Group
 				}
-				row.Metrics = append(row.Metrics, m)
-				if row.Scaler == nil {
-					row.Scaler = NewScaler(m.Mean, m.Unit)
-				}
-			}
 
-			// If there are only two configs being compared, add stats.
-			if table.OldNewDelta {
-				k0 := key
-				k0.Config = c.Configs[0]
-				k1 := key
-				k1.Config = c.Configs[1]
-				old := c.Metrics[k0]
-				new := c.Metrics[k1]
-				// If one is missing, omit row entirely.
-				// TODO: Control this better.
-				if old == nil || new == nil {
-					continue
-				}
-				pval, testerr := deltaTest(old, new)
-				row.Delta = "~"
-				if testerr == stats.ErrZeroVariance {
-					row.Note = "(zero variance)"
-				} else if testerr == stats.ErrSampleSize {
-					row.Note = "(too few samples)"
-				} else if testerr == stats.ErrSamplesEqual {
-					row.Note = "(all equal)"
-				} else if testerr != nil {
-					row.Note = fmt.Sprintf("(%s)", testerr)
-				} else if pval < alpha {
-					pct := ((new.Mean / old.Mean) - 1.0) * 100.0
-					row.Delta = fmt.Sprintf("%+.2f%%", pct)
-					if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
-						row.Change = +1
-					} else {
-						row.Change = -1
+				for _, key.Config = range c.Configs {
+					m := c.Metrics[key]
+					if m == nil {
+						row.Metrics = append(row.Metrics, new(Metrics))
+						continue
+					}
+					row.Metrics = append(row.Metrics, m)
+					if row.Scaler == nil {
+						row.Scaler = NewScaler(m.Mean, m.Unit)
 					}
 				}
-				if row.Note == "" && pval != -1 {
-					row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
+
+				// If there are only two configs being compared, add stats.
+				if table.OldNewDelta {
+					k0 := key
+					k0.Config = c.Configs[0]
+					k1 := key
+					k1.Config = c.Configs[1]
+					old := c.Metrics[k0]
+					new := c.Metrics[k1]
+					// If one is missing, omit row entirely.
+					// TODO: Control this better.
+					if old == nil || new == nil {
+						continue
+					}
+					pval, testerr := deltaTest(old, new)
+					row.Delta = "~"
+					if testerr == stats.ErrZeroVariance {
+						row.Note = "(zero variance)"
+					} else if testerr == stats.ErrSampleSize {
+						row.Note = "(too few samples)"
+					} else if testerr == stats.ErrSamplesEqual {
+						row.Note = "(all equal)"
+					} else if testerr != nil {
+						row.Note = fmt.Sprintf("(%s)", testerr)
+					} else if pval < alpha {
+						pct := ((new.Mean / old.Mean) - 1.0) * 100.0
+						row.Delta = fmt.Sprintf("%+.2f%%", pct)
+						if pct < 0 == (table.Metric != "speed") { // smaller is better, except speeds
+							row.Change = +1
+						} else {
+							row.Change = -1
+						}
+					}
+					if row.Note == "" && pval != -1 {
+						row.Note = fmt.Sprintf("(p=%0.3f n=%d+%d)", pval, len(old.RValues), len(new.RValues))
+					}
 				}
-			}
 
-			table.Rows = append(table.Rows, row)
+				table.Rows = append(table.Rows, row)
+			}
 		}
 
 		if len(table.Rows) > 0 {
@@ -140,16 +149,18 @@ func addGeomean(c *Collection, t *Table, unit string, delta bool) {
 	maxCount := 0
 	for _, key.Config = range c.Configs {
 		var means []float64
-		for _, key.Benchmark = range c.Benchmarks {
-			m := c.Metrics[key]
-			// Omit 0 values from the geomean calculation,
-			// as these either make the geomean undefined
-			// or zero (depending on who you ask). This
-			// typically comes up with things like
-			// allocation counts, where it's fine to just
-			// ignore the benchmark.
-			if m != nil && m.Mean != 0 {
-				means = append(means, m.Mean)
+		for _, key.Group = range c.Groups {
+			for _, key.Benchmark = range c.Benchmarks[key.Group] {
+				m := c.Metrics[key]
+				// Omit 0 values from the geomean calculation,
+				// as these either make the geomean undefined
+				// or zero (depending on who you ask). This
+				// typically comes up with things like
+				// allocation counts, where it's fine to just
+				// ignore the benchmark.
+				if m != nil && m.Mean != 0 {
+					means = append(means, m.Mean)
+				}
 			}
 		}
 		if len(means) > maxCount {