forked from dgraph-io/badger
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add a Prefix option in IterationOptions (dgraph-io#628)
When creating an iterator, we pick all the tables in LSM tree. In Dgraph, a lot of times we only need to iterate over all versions of a single key. But, doing a `Seek` costs extra cycles to jump over all the tables which don't have the prefix. This PR adds a `Prefix` option in IteratorOptions, to allow selectively picking only those tables, which could potentially have the `Prefix`. In benchmarks, I see a 20% improvement in latency with 80 SSTables, when looking for a single key. Fixes dgraph-io#625 . Commits: * Only pick up tables which would have the prefix needed for iteration. This should be faster than picking all of them. * Left test file behind * Add benchmark to see the impact of opt.Prefix. I see a 20% improvement when there are 80 SSTables.
- Loading branch information
1 parent
ba13ac7
commit 7d46029
Showing
4 changed files
with
252 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* | ||
* Copyright 2018 Dgraph Labs, Inc. and Contributors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package badger | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
"io/ioutil" | ||
"math/rand" | ||
"os" | ||
"path/filepath" | ||
"strings" | ||
"testing" | ||
|
||
"github.com/dgraph-io/badger/options" | ||
"github.com/dgraph-io/badger/y" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestPickTables(t *testing.T) { | ||
opt := DefaultIteratorOptions | ||
|
||
within := func(prefix, left, right string) { | ||
opt.Prefix = []byte(prefix) | ||
require.True(t, opt.PickTable([]byte(left), []byte(right))) | ||
} | ||
outside := func(prefix, left, right string) { | ||
opt.Prefix = []byte(prefix) | ||
require.False(t, opt.PickTable([]byte(left), []byte(right))) | ||
} | ||
within("abc", "ab", "ad") | ||
within("abc", "abc", "ad") | ||
within("abc", "abb123", "ad") | ||
within("abc", "abc123", "abd234") | ||
within("abc", "abc123", "abc456") | ||
|
||
outside("abd", "abe", "ad") | ||
outside("abd", "ac", "ad") | ||
outside("abd", "b", "e") | ||
outside("abd", "a", "ab") | ||
outside("abd", "ab", "abc") | ||
outside("abd", "ab", "abc123") | ||
} | ||
|
||
func TestIteratePrefix(t *testing.T) { | ||
runBadgerTest(t, nil, func(t *testing.T, db *DB) { | ||
bkey := func(i int) []byte { | ||
return []byte(fmt.Sprintf("%04d", i)) | ||
} | ||
val := []byte("OK") | ||
n := 10000 | ||
|
||
batch := db.NewWriteBatch() | ||
for i := 0; i < n; i++ { | ||
if (i % 1000) == 0 { | ||
t.Logf("Put i=%d\n", i) | ||
} | ||
require.NoError(t, batch.Set(bkey(i), val, 0)) | ||
} | ||
require.NoError(t, batch.Flush()) | ||
|
||
countKeys := func(prefix string) int { | ||
t.Logf("Testing with prefix: %s", prefix) | ||
var count int | ||
opt := DefaultIteratorOptions | ||
opt.Prefix = []byte(prefix) | ||
err := db.View(func(txn *Txn) error { | ||
itr := txn.NewIterator(opt) | ||
defer itr.Close() | ||
for itr.Rewind(); itr.Valid(); itr.Next() { | ||
item := itr.Item() | ||
err := item.Value(func(v []byte) error { | ||
require.Equal(t, val, v) | ||
return nil | ||
}) | ||
require.NoError(t, err) | ||
require.True(t, bytes.HasPrefix(item.Key(), opt.Prefix)) | ||
count++ | ||
} | ||
return nil | ||
}) | ||
require.NoError(t, err) | ||
return count | ||
} | ||
|
||
for i := 0; i <= 9; i++ { | ||
require.Equal(t, 1, countKeys(fmt.Sprintf("%d%d%d%d", i, i, i, i))) | ||
require.Equal(t, 10, countKeys(fmt.Sprintf("%d%d%d", i, i, i))) | ||
require.Equal(t, 100, countKeys(fmt.Sprintf("%d%d", i, i))) | ||
require.Equal(t, 1000, countKeys(fmt.Sprintf("%d", i))) | ||
require.Equal(t, 10000, countKeys("")) | ||
} | ||
}) | ||
} | ||
|
||
// go test -v -run=XXX -bench=BenchmarkIterate -benchtime=3s | ||
// Benchmark with opt.Prefix set === | ||
// goos: linux | ||
// goarch: amd64 | ||
// pkg: github.com/dgraph-io/badger | ||
// BenchmarkIteratePrefixSingleKey/Key_lookups-4 10000 365539 ns/op | ||
// --- BENCH: BenchmarkIteratePrefixSingleKey/Key_lookups-4 | ||
// iterator_test.go:147: Inner b.N: 1 | ||
// iterator_test.go:147: Inner b.N: 100 | ||
// iterator_test.go:147: Inner b.N: 10000 | ||
// --- BENCH: BenchmarkIteratePrefixSingleKey | ||
// iterator_test.go:143: LSM files: 79 | ||
// iterator_test.go:145: Outer b.N: 1 | ||
// PASS | ||
// ok github.com/dgraph-io/badger 41.586s | ||
// | ||
// Benchmark with NO opt.Prefix set === | ||
// goos: linux | ||
// goarch: amd64 | ||
// pkg: github.com/dgraph-io/badger | ||
// BenchmarkIteratePrefixSingleKey/Key_lookups-4 10000 460924 ns/op | ||
// --- BENCH: BenchmarkIteratePrefixSingleKey/Key_lookups-4 | ||
// iterator_test.go:147: Inner b.N: 1 | ||
// iterator_test.go:147: Inner b.N: 100 | ||
// iterator_test.go:147: Inner b.N: 10000 | ||
// --- BENCH: BenchmarkIteratePrefixSingleKey | ||
// iterator_test.go:143: LSM files: 83 | ||
// iterator_test.go:145: Outer b.N: 1 | ||
// PASS | ||
// ok github.com/dgraph-io/badger 41.836s | ||
// | ||
// Only my laptop there's a 20% improvement in latency with ~80 files. | ||
func BenchmarkIteratePrefixSingleKey(b *testing.B) { | ||
dir, err := ioutil.TempDir(".", "badger-test") | ||
y.Check(err) | ||
defer os.RemoveAll(dir) | ||
opts := getTestOptions(dir) | ||
opts.TableLoadingMode = options.LoadToRAM | ||
db, err := Open(opts) | ||
y.Check(err) | ||
defer db.Close() | ||
|
||
N := 100000 // Should generate around 80 SSTables. | ||
val := []byte("OK") | ||
bkey := func(i int) []byte { | ||
return []byte(fmt.Sprintf("%06d", i)) | ||
} | ||
|
||
batch := db.NewWriteBatch() | ||
for i := 0; i < N; i++ { | ||
y.Check(batch.Set(bkey(i), val, 0)) | ||
} | ||
y.Check(batch.Flush()) | ||
var lsmFiles int | ||
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { | ||
if strings.HasSuffix(path, ".sst") { | ||
lsmFiles++ | ||
} | ||
if err != nil { | ||
return err | ||
} | ||
return nil | ||
}) | ||
y.Check(err) | ||
b.Logf("LSM files: %d", lsmFiles) | ||
|
||
b.Logf("Outer b.N: %d", b.N) | ||
b.Run("Key lookups", func(b *testing.B) { | ||
b.Logf("Inner b.N: %d", b.N) | ||
for i := 0; i < b.N; i++ { | ||
key := bkey(rand.Intn(N)) | ||
err := db.View(func(txn *Txn) error { | ||
opt := DefaultIteratorOptions | ||
// NOTE: Comment opt.Prefix out here to compare the performance | ||
// difference between providing Prefix as an option, v/s not. I | ||
// see a 20% improvement when there are ~80 SSTables. | ||
opt.Prefix = key | ||
opt.AllVersions = true | ||
|
||
itr := txn.NewIterator(opt) | ||
defer itr.Close() | ||
|
||
var count int | ||
for itr.Seek(key); itr.ValidForPrefix(key); itr.Next() { | ||
count++ | ||
} | ||
if count != 1 { | ||
b.Fatalf("Count must be one key: %s. Found: %d", key, count) | ||
} | ||
return nil | ||
}) | ||
if err != nil { | ||
b.Fatalf("Error while View: %v", err) | ||
} | ||
} | ||
}) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters