Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 107 additions & 27 deletions vm/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ import (
"github.com/expr-lang/expr/vm/runtime"
)

const maxFnArgsBuf = 256

func Run(program *Program, env any) (any, error) {
if program == nil {
return nil, fmt.Errorf("program is nil")
Expand Down Expand Up @@ -83,6 +85,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
vm.memory = 0
vm.ip = 0

var fnArgsBuf []any

for vm.ip < len(program.Bytecode) {
if debug && vm.debug {
<-vm.step
Expand Down Expand Up @@ -355,62 +359,53 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
vm.push(out)

case OpCall1:
a := vm.pop()
out, err := program.functions[arg](a)
var args []any
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 1)
out, err := program.functions[arg](args...)
if err != nil {
panic(err)
}
vm.push(out)

case OpCall2:
b := vm.pop()
a := vm.pop()
out, err := program.functions[arg](a, b)
var args []any
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 2)
out, err := program.functions[arg](args...)
if err != nil {
panic(err)
}
vm.push(out)

case OpCall3:
c := vm.pop()
b := vm.pop()
a := vm.pop()
out, err := program.functions[arg](a, b, c)
var args []any
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, 3)
out, err := program.functions[arg](args...)
if err != nil {
panic(err)
}
vm.push(out)

case OpCallN:
fn := vm.pop().(Function)
size := arg
in := make([]any, size)
for i := int(size) - 1; i >= 0; i-- {
in[i] = vm.pop()
}
out, err := fn(in...)
var args []any
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg)
out, err := fn(args...)
if err != nil {
panic(err)
}
vm.push(out)

case OpCallFast:
fn := vm.pop().(func(...any) any)
size := arg
in := make([]any, size)
for i := int(size) - 1; i >= 0; i-- {
in[i] = vm.pop()
}
vm.push(fn(in...))
var args []any
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg)
vm.push(fn(args...))

case OpCallSafe:
fn := vm.pop().(SafeFunction)
size := arg
in := make([]any, size)
for i := int(size) - 1; i >= 0; i-- {
in[i] = vm.pop()
}
out, mem, err := fn(in...)
var args []any
args, fnArgsBuf = vm.getArgsForFunc(fnArgsBuf, program, arg)
out, mem, err := fn(args...)
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -609,6 +604,64 @@ func (vm *VM) scope() *Scope {
return vm.Scopes[len(vm.Scopes)-1]
}

// getArgsForFunc lazily initializes the buffer the first time it is called for
// a given program (thus, it also needs "program" to run). It will
// take "needed" elements from the buffer and populate them with vm.pop() in
// reverse order. Because the estimation can fall short, this function can
// occasionally make a new allocation.
func (vm *VM) getArgsForFunc(argsBuf []any, program *Program, needed int) (args []any, argsBufOut []any) {
if needed == 0 || program == nil {
return nil, argsBuf
}

// Step 1: fix estimations and preallocate
if argsBuf == nil {
estimatedFnArgsCount := estimateFnArgsCount(program)
if estimatedFnArgsCount > maxFnArgsBuf {
// put a practical limit to avoid excessive preallocation
estimatedFnArgsCount = maxFnArgsBuf
}
if estimatedFnArgsCount < needed {
// in the case that the first call is for example OpCallN with a large
// number of arguments, then make sure we will be able to serve them at
// least.
estimatedFnArgsCount = needed
}

// in the case that we are preparing the arguments for the first
// function call of the program, then argsBuf will be nil, so we
// initialize it. We delay this initial allocation here because a
// program could have many function calls but exit earlier than the
// first call, so in that case we avoid allocating unnecessarily
argsBuf = make([]any, estimatedFnArgsCount)
}

// Step 2: get the final slice that will be returned
var buf []any
if len(argsBuf) >= needed {
// in this case, we are successfully using the single preallocation. We
// use the full slice expression [low : high : max] because in that way
// a function that receives this slice as variadic arguments will not be
// able to make modifications to contiguous elements with append(). If
// they call append on their variadic arguments they will make a new
// allocation.
buf = (argsBuf)[:needed:needed]
argsBuf = (argsBuf)[needed:] // advance the buffer
} else {
// if we have been making calls to something like OpCallN with many more
// arguments than what we estimated, then we will need to allocate
// separately
buf = make([]any, needed)
}

// Step 3: populate the final slice bulk copying from the stack. This is the
// exact order and copy() is a highly optimized operation
copy(buf, vm.Stack[len(vm.Stack)-needed:])
vm.Stack = vm.Stack[:len(vm.Stack)-needed]

return buf, argsBuf
}

func (vm *VM) Step() {
vm.step <- struct{}{}
}
Expand All @@ -623,3 +676,30 @@ func clearSlice[S ~[]E, E any](s S) {
s[i] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used
}
}

// estimateFnArgsCount inspects a *Program and estimates how many function
// arguments will be required to run it.
func estimateFnArgsCount(program *Program) int {
// Implementation note: a program will not necessarily go through all
// operations, but this is just an estimation
var count int
for _, op := range program.Bytecode {
if int(op) < len(opArgLenEstimation) {
count += opArgLenEstimation[op]
}
}
return count
}

var opArgLenEstimation = [...]int{
OpCall1: 1,
OpCall2: 2,
OpCall3: 3,
// we don't know exactly but we know at least 4, so be conservative as this
// is only an optimization and we also want to avoid excessive preallocation
OpCallN: 4,
// here we don't know either, but we can guess it could be common to receive
// up to 3 arguments in a function
OpCallFast: 3,
OpCallSafe: 3,
}
Comment on lines +682 to +705
Copy link
Contributor Author

@diegommm diegommm Sep 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially used a switch in estimateFnArgsCount but then tried with this table and got a 4% improvement in speed.
However, you can see that I am making an array with 56 elements but I'm using only 6. I preferred it this way because I think the code looks clearer. But if you prefer to make the table use exactly the number of items it needs then we would just need to make the following change (you can just apply this suggestion as it is here if you want, I just tested this exact code and it is also correctly formatted with spaces :) ):

Suggested change
func estimateFnArgsCount(program *Program) int {
// Implementation note: a program will not necessarily go through all
// operations, but this is just an estimation
var count int
for _, op := range program.Bytecode {
if int(op) < len(opArgLenEstimation) {
count += opArgLenEstimation[op]
}
}
return count
}
var opArgLenEstimation = [...]int{
OpCall1: 1,
OpCall2: 2,
OpCall3: 3,
// we don't know exactly but we know at least 4, so be conservative as this
// is only an optimization and we also want to avoid excessive preallocation
OpCallN: 4,
// here we don't know either, but we can guess it could be common to receive
// up to 3 arguments in a function
OpCallFast: 3,
OpCallSafe: 3,
}
func estimateFnArgsCount(program *Program) int {
// Implementation note: a program will not necessarily go through all
// operations, but this is just an estimation
var count int
for _, op := range program.Bytecode {
op -= OpCall1 // if underflows only becomes bigger so it's ok
if int(op) < len(opArgLenEstimation) {
count += opArgLenEstimation[op]
}
}
return count
}
var opArgLenEstimation = [...]int{
OpCall1 - OpCall1: 1,
OpCall2 - OpCall1: 2,
OpCall3 - OpCall1: 3,
// we don't know exactly but we know at least 4, so be conservative as this
// is only an optimization and we also want to avoid excessive preallocation
OpCallN - OpCall1: 4,
// here we don't know either, but we can guess it could be common to receive
// up to 3 arguments in a function
OpCallFast - OpCall1: 3,
OpCallSafe - OpCall1: 3,
}

82 changes: 82 additions & 0 deletions vm/vm_bench_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
package vm_test

import (
"runtime"
"testing"

"github.com/expr-lang/expr"
"github.com/expr-lang/expr/checker"
"github.com/expr-lang/expr/compiler"
"github.com/expr-lang/expr/conf"
"github.com/expr-lang/expr/vm"
)

func BenchmarkVM(b *testing.B) {
cases := []struct {
name, input string
}{
{"function calls", `
func(
func(
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
),
func(
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
),
func(
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
func(func(a, 'a', 1, nil), func(a, 'a', 1, nil), func(a, 'a', 1, nil)),
)
)
`},
}

a := new(recursive)
for i, b := 0, a; i < 40*4; i++ {
b.Inner = new(recursive)
b = b.Inner
}

f := func(params ...any) (any, error) { return nil, nil }
env := map[string]any{
"a": a,
"b": true,
"func": f,
}
config := conf.New(env)
expr.Function("func", f, f)(config)
config.Check()

for _, c := range cases {
tree, err := checker.ParseCheck(c.input, config)
if err != nil {
b.Fatal(c.input, "parse and check", err)
}
prog, err := compiler.Compile(tree, config)
if err != nil {
b.Fatal(c.input, "compile", err)
}
//b.Logf("disassembled:\n%s", prog.Disassemble())
//b.FailNow()
runtime.GC()

var vm vm.VM
b.Run("name="+c.name, func(b *testing.B) {
for i := 0; i < b.N; i++ {
_, err = vm.Run(prog, env)
}
})
if err != nil {
b.Fatal(err)
}
}
}

type recursive struct {
Inner *recursive `expr:"a"`
}
Loading