starlark: add parameter to bench_xyz(b) benchmark functions (google#323)

The parameter is a wrapper around a testing.B, that provides the number of iterations and start/stop/reset methods. It follows the design used in java.starlark.net; see bazelbuild/starlark#75 (review) bazelbuild/bazel@146cd2c
brandjon · Dec 2, 2020 · a783991 · a783991
1 parent e55f603
commit a783991
Show file tree

Hide file tree

Showing 2 changed files with 103 additions and 24 deletions.
diff --git a/starlark/bench_test.go b/starlark/bench_test.go
@@ -6,6 +6,7 @@ package starlark_test
 
 import (
 	"bytes"
+	"fmt"
 	"io/ioutil"
 	"path/filepath"
 	"strings"
@@ -45,18 +46,76 @@ func Benchmark(b *testing.B) {
 			value := globals[name]
 			if fn, ok := value.(*starlark.Function); ok && strings.HasPrefix(name, "bench_") {
 				b.Run(name, func(b *testing.B) {
-					for i := 0; i < b.N; i++ {
-						_, err := starlark.Call(thread, fn, nil, nil)
-						if err != nil {
-							reportEvalError(b, err)
-						}
+					_, err := starlark.Call(thread, fn, starlark.Tuple{benchmark{b}}, nil)
+					if err != nil {
+						reportEvalError(b, err)
 					}
 				})
 			}
 		}
 	}
 }
 
+// A benchmark is passed to each bench_xyz(b) function in a bench_*.star file.
+// It provides b.n, the number of iterations that must be executed by the function,
+// which is typically of the form:
+//
+//   def bench_foo(b):
+//      for _ in range(b.n):
+//         ...work...
+//
+// It also provides stop, start, and restart methods to stop the clock in case
+// there is significant set-up work that should not count against the measured
+// operation.
+//
+// (This interface is inspired by Go's testing.B, and is also implemented
+// by the java.starlark.net implementation; see
+// https://github.com/bazelbuild/starlark/pull/75#pullrequestreview-275604129.)
+type benchmark struct {
+	b *testing.B
+}
+
+func (benchmark) Freeze()               {}
+func (benchmark) Truth() starlark.Bool  { return true }
+func (benchmark) Type() string          { return "benchmark" }
+func (benchmark) String() string        { return "<benchmark>" }
+func (benchmark) Hash() (uint32, error) { return 0, fmt.Errorf("unhashable: benchmark") }
+func (benchmark) AttrNames() []string   { return []string{"n", "restart", "start", "stop"} }
+func (b benchmark) Attr(name string) (starlark.Value, error) {
+	switch name {
+	case "n":
+		return starlark.MakeInt(b.b.N), nil
+	case "restart":
+		return benchmarkRestart.BindReceiver(b), nil
+	case "start":
+		return benchmarkStart.BindReceiver(b), nil
+	case "stop":
+		return benchmarkStop.BindReceiver(b), nil
+	}
+	return nil, nil
+}
+
+var (
+	benchmarkRestart = starlark.NewBuiltin("restart", benchmarkRestartImpl)
+	benchmarkStart   = starlark.NewBuiltin("start", benchmarkStartImpl)
+	benchmarkStop    = starlark.NewBuiltin("stop", benchmarkStopImpl)
+)
+
+func benchmarkRestartImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+	b.Receiver().(benchmark).b.ResetTimer()
+	return starlark.None, nil
+}
+
+func benchmarkStartImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+	b.Receiver().(benchmark).b.StartTimer()
+	return starlark.None, nil
+}
+
+func benchmarkStopImpl(thread *starlark.Thread, b *starlark.Builtin, args starlark.Tuple, kwargs []starlark.Tuple) (starlark.Value, error) {
+	b.Receiver().(benchmark).b.StopTimer()
+	return starlark.None, nil
+}
+
 // BenchmarkProgram measures operations relevant to compiled programs.
 // TODO(adonovan): use a bigger testdata program.
 func BenchmarkProgram(b *testing.B) {

diff --git a/starlark/testdata/benchmark.star b/starlark/testdata/benchmark.star
@@ -1,11 +1,17 @@
 # Benchmarks of Starlark execution
 # option:nesteddef
 
-def bench_range():
-    return range(200)
+def bench_range_construction(b):
+    for _ in range(b.n):
+        range(200)
+
+def bench_range_iteration(b):
+    for _ in range(b.n):
+        for x in range(200):
+            pass
 
 # Make a 2-level call tree of 100 * 100 calls.
-def bench_calling():
+def bench_calling(b):
     list = range(100)
 
     def g():
@@ -16,28 +22,42 @@ def bench_calling():
         for x in list:
             g()
 
-    f()
+    for _ in range(b.n):
+        f()
 
 # Measure overhead of calling a trivial built-in method.
 emptydict = {}
 range1000 = range(1000)
 
-def bench_builtin_method():
-    for _ in range1000:
-        emptydict.get(None)
+def bench_builtin_method(b):
+    for _ in range(b.n):
+        for _ in range1000:
+            emptydict.get(None)
 
-def bench_int():
-    a = 0
-    for _ in range1000:
-        a += 1
+def bench_int(b):
+    for _ in range(b.n):
+        a = 0
+        for _ in range1000:
+            a += 1
 
-def bench_bigint():
-    a = 1 << 31  # maxint32 + 1
-    for _ in range1000:
-        a += 1
+def bench_bigint(b):
+    for _ in range(b.n):
+        a = 1 << 31  # maxint32 + 1
+        for _ in range1000:
+            a += 1
 
-def bench_gauss():
+def bench_gauss(b):
     # Sum of arithmetic series. All results fit in int32.
-    acc = 0
-    for x in range(92000):
-        acc += x
+    for _ in range(b.n):
+        acc = 0
+        for x in range(92000):
+            acc += x
+
+def bench_mix(b):
+    "Benchmark of a simple mix of computation (for, if, arithmetic, comprehension)."
+    for _ in range(b.n):
+        x = 0
+        for i in range(50):
+            if i:
+                x += 1
+            a = [x for x in range(i)]