Skip to content

Commit

Permalink
Add a Prefix option in IterationOptions (dgraph-io#628)
Browse files Browse the repository at this point in the history
When creating an iterator, we pick all the tables in LSM tree. In Dgraph, a lot of times we only need to iterate over all versions of a single key. But, doing a `Seek` costs extra cycles to jump over all the tables which don't have the prefix. This PR adds a `Prefix` option in IteratorOptions, to allow selectively picking only those tables, which could potentially have the `Prefix`.

In benchmarks, I see a 20% improvement in latency with 80 SSTables, when looking for a single key.

Fixes dgraph-io#625 .

Commits:

* Only pick up tables which would have the prefix needed for iteration. This should be faster than picking all of them.
* Left test file behind
* Add benchmark to see the impact of opt.Prefix. I see a 20% improvement when there are 80 SSTables.
  • Loading branch information
manishrjain authored Nov 25, 2018
1 parent ba13ac7 commit 7d46029
Show file tree
Hide file tree
Showing 4 changed files with 252 additions and 20 deletions.
46 changes: 31 additions & 15 deletions iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -318,12 +318,27 @@ type IteratorOptions struct {
PrefetchValues bool
// How many KV pairs to prefetch while iterating. Valid only if PrefetchValues is true.
PrefetchSize int
Reverse bool // Direction of iteration. False is forward, true is backward.
AllVersions bool // Fetch all valid versions of the same key.
Prefix []byte // Only iterate over this given prefix.
Reverse bool // Direction of iteration. False is forward, true is backward.
AllVersions bool // Fetch all valid versions of the same key.

internalAccess bool // Used to allow internal access to badger keys.
}

func (opt *IteratorOptions) PickTable(left, right []byte) bool {
if len(opt.Prefix) == 0 {
return true
}
trim := func(key []byte) []byte {
if len(key) > len(opt.Prefix) {
return key[:len(opt.Prefix)]
}
return key
}
return bytes.Compare(trim(left), opt.Prefix) <= 0 &&
bytes.Compare(trim(right), opt.Prefix) >= 0
}

// DefaultIteratorOptions contains default options when iterating over Badger key-value stores.
var DefaultIteratorOptions = IteratorOptions{
PrefetchValues: true,
Expand Down Expand Up @@ -365,6 +380,8 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
panic("Only one iterator can be active at one time, for a RW txn.")
}

// TODO: If Prefix is set, only pick those memtables which have keys with
// the prefix.
tables, decr := txn.db.getMemTables()
defer decr()
txn.db.vlog.incrIteratorCount()
Expand All @@ -375,7 +392,7 @@ func (txn *Txn) NewIterator(opt IteratorOptions) *Iterator {
for i := 0; i < len(tables); i++ {
iters = append(iters, tables[i].NewUniIterator(opt.Reverse))
}
iters = txn.db.lc.appendIterators(iters, opt.Reverse) // This will increment references.
iters = txn.db.lc.appendIterators(iters, &opt) // This will increment references.
res := &Iterator{
txn: txn,
iitr: y.NewMergeIterator(iters, opt.Reverse),
Expand All @@ -402,12 +419,17 @@ func (it *Iterator) Item() *Item {
}

// Valid returns false when iteration is done.
func (it *Iterator) Valid() bool { return it.item != nil }
func (it *Iterator) Valid() bool {
if it.item == nil {
return false
}
return bytes.HasPrefix(it.item.key, it.opt.Prefix)
}

// ValidForPrefix returns false when iteration is done
// or when the current key is not prefixed by the specified prefix.
func (it *Iterator) ValidForPrefix(prefix []byte) bool {
return it.item != nil && bytes.HasPrefix(it.item.key, prefix)
return it.Valid() && bytes.HasPrefix(it.item.key, prefix)
}

// Close would close the iterator. It is important to call this when you're done with iteration.
Expand Down Expand Up @@ -602,6 +624,9 @@ func (it *Iterator) Seek(key []byte) {
}

it.lastKey = it.lastKey[:0]
if len(key) == 0 {
key = it.opt.Prefix
}
if len(key) == 0 {
it.iitr.Rewind()
it.prefetch()
Expand All @@ -621,14 +646,5 @@ func (it *Iterator) Seek(key []byte) {
// smallest key if iterating forward, and largest if iterating backward. It does not keep track of
// whether the cursor started with a Seek().
func (it *Iterator) Rewind() {
i := it.data.pop()
for i != nil {
i.wg.Wait() // Just cleaner to wait before pushing. No ref counting needed.
it.waste.push(i)
i = it.data.pop()
}

it.lastKey = it.lastKey[:0]
it.iitr.Rewind()
it.prefetch()
it.Seek(nil)
}
206 changes: 206 additions & 0 deletions iterator_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
/*
* Copyright 2018 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package badger

import (
"bytes"
"fmt"
"io/ioutil"
"math/rand"
"os"
"path/filepath"
"strings"
"testing"

"github.com/dgraph-io/badger/options"
"github.com/dgraph-io/badger/y"
"github.com/stretchr/testify/require"
)

func TestPickTables(t *testing.T) {
opt := DefaultIteratorOptions

within := func(prefix, left, right string) {
opt.Prefix = []byte(prefix)
require.True(t, opt.PickTable([]byte(left), []byte(right)))
}
outside := func(prefix, left, right string) {
opt.Prefix = []byte(prefix)
require.False(t, opt.PickTable([]byte(left), []byte(right)))
}
within("abc", "ab", "ad")
within("abc", "abc", "ad")
within("abc", "abb123", "ad")
within("abc", "abc123", "abd234")
within("abc", "abc123", "abc456")

outside("abd", "abe", "ad")
outside("abd", "ac", "ad")
outside("abd", "b", "e")
outside("abd", "a", "ab")
outside("abd", "ab", "abc")
outside("abd", "ab", "abc123")
}

func TestIteratePrefix(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%04d", i))
}
val := []byte("OK")
n := 10000

batch := db.NewWriteBatch()
for i := 0; i < n; i++ {
if (i % 1000) == 0 {
t.Logf("Put i=%d\n", i)
}
require.NoError(t, batch.Set(bkey(i), val, 0))
}
require.NoError(t, batch.Flush())

countKeys := func(prefix string) int {
t.Logf("Testing with prefix: %s", prefix)
var count int
opt := DefaultIteratorOptions
opt.Prefix = []byte(prefix)
err := db.View(func(txn *Txn) error {
itr := txn.NewIterator(opt)
defer itr.Close()
for itr.Rewind(); itr.Valid(); itr.Next() {
item := itr.Item()
err := item.Value(func(v []byte) error {
require.Equal(t, val, v)
return nil
})
require.NoError(t, err)
require.True(t, bytes.HasPrefix(item.Key(), opt.Prefix))
count++
}
return nil
})
require.NoError(t, err)
return count
}

for i := 0; i <= 9; i++ {
require.Equal(t, 1, countKeys(fmt.Sprintf("%d%d%d%d", i, i, i, i)))
require.Equal(t, 10, countKeys(fmt.Sprintf("%d%d%d", i, i, i)))
require.Equal(t, 100, countKeys(fmt.Sprintf("%d%d", i, i)))
require.Equal(t, 1000, countKeys(fmt.Sprintf("%d", i)))
require.Equal(t, 10000, countKeys(""))
}
})
}

// go test -v -run=XXX -bench=BenchmarkIterate -benchtime=3s
// Benchmark with opt.Prefix set ===
// goos: linux
// goarch: amd64
// pkg: github.com/dgraph-io/badger
// BenchmarkIteratePrefixSingleKey/Key_lookups-4 10000 365539 ns/op
// --- BENCH: BenchmarkIteratePrefixSingleKey/Key_lookups-4
// iterator_test.go:147: Inner b.N: 1
// iterator_test.go:147: Inner b.N: 100
// iterator_test.go:147: Inner b.N: 10000
// --- BENCH: BenchmarkIteratePrefixSingleKey
// iterator_test.go:143: LSM files: 79
// iterator_test.go:145: Outer b.N: 1
// PASS
// ok github.com/dgraph-io/badger 41.586s
//
// Benchmark with NO opt.Prefix set ===
// goos: linux
// goarch: amd64
// pkg: github.com/dgraph-io/badger
// BenchmarkIteratePrefixSingleKey/Key_lookups-4 10000 460924 ns/op
// --- BENCH: BenchmarkIteratePrefixSingleKey/Key_lookups-4
// iterator_test.go:147: Inner b.N: 1
// iterator_test.go:147: Inner b.N: 100
// iterator_test.go:147: Inner b.N: 10000
// --- BENCH: BenchmarkIteratePrefixSingleKey
// iterator_test.go:143: LSM files: 83
// iterator_test.go:145: Outer b.N: 1
// PASS
// ok github.com/dgraph-io/badger 41.836s
//
// Only my laptop there's a 20% improvement in latency with ~80 files.
func BenchmarkIteratePrefixSingleKey(b *testing.B) {
dir, err := ioutil.TempDir(".", "badger-test")
y.Check(err)
defer os.RemoveAll(dir)
opts := getTestOptions(dir)
opts.TableLoadingMode = options.LoadToRAM
db, err := Open(opts)
y.Check(err)
defer db.Close()

N := 100000 // Should generate around 80 SSTables.
val := []byte("OK")
bkey := func(i int) []byte {
return []byte(fmt.Sprintf("%06d", i))
}

batch := db.NewWriteBatch()
for i := 0; i < N; i++ {
y.Check(batch.Set(bkey(i), val, 0))
}
y.Check(batch.Flush())
var lsmFiles int
err = filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if strings.HasSuffix(path, ".sst") {
lsmFiles++
}
if err != nil {
return err
}
return nil
})
y.Check(err)
b.Logf("LSM files: %d", lsmFiles)

b.Logf("Outer b.N: %d", b.N)
b.Run("Key lookups", func(b *testing.B) {
b.Logf("Inner b.N: %d", b.N)
for i := 0; i < b.N; i++ {
key := bkey(rand.Intn(N))
err := db.View(func(txn *Txn) error {
opt := DefaultIteratorOptions
// NOTE: Comment opt.Prefix out here to compare the performance
// difference between providing Prefix as an option, v/s not. I
// see a 20% improvement when there are ~80 SSTables.
opt.Prefix = key
opt.AllVersions = true

itr := txn.NewIterator(opt)
defer itr.Close()

var count int
for itr.Seek(key); itr.ValidForPrefix(key); itr.Next() {
count++
}
if count != 1 {
b.Fatalf("Count must be one key: %s. Found: %d", key, count)
}
return nil
})
if err != nil {
b.Fatalf("Error while View: %v", err)
}
}
})
}
16 changes: 13 additions & 3 deletions level_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -266,16 +266,26 @@ func (s *levelHandler) get(key []byte) (y.ValueStruct, error) {

// appendIterators appends iterators to an array of iterators, for merging.
// Note: This obtains references for the table handlers. Remember to close these iterators.
func (s *levelHandler) appendIterators(iters []y.Iterator, reversed bool) []y.Iterator {
func (s *levelHandler) appendIterators(iters []y.Iterator, opt *IteratorOptions) []y.Iterator {
s.RLock()
defer s.RUnlock()

tables := make([]*table.Table, 0, len(s.tables))
for _, t := range s.tables {
if opt.PickTable(t.Smallest(), t.Biggest()) {
tables = append(tables, t)
}
}
if len(tables) == 0 {
return iters
}

if s.level == 0 {
// Remember to add in reverse order!
// The newer table at the end of s.tables should be added first as it takes precedence.
return appendIteratorsReversed(iters, s.tables, reversed)
return appendIteratorsReversed(iters, tables, opt.Reverse)
}
return append(iters, table.NewConcatIterator(s.tables, reversed))
return append(iters, table.NewConcatIterator(tables, opt.Reverse))
}

type levelHandlerRLocked struct{}
Expand Down
4 changes: 2 additions & 2 deletions levels.go
Original file line number Diff line number Diff line change
Expand Up @@ -810,11 +810,11 @@ func appendIteratorsReversed(out []y.Iterator, th []*table.Table, reversed bool)
// appendIterators appends iterators to an array of iterators, for merging.
// Note: This obtains references for the table handlers. Remember to close these iterators.
func (s *levelsController) appendIterators(
iters []y.Iterator, reversed bool) []y.Iterator {
iters []y.Iterator, opt *IteratorOptions) []y.Iterator {
// Just like with get, it's important we iterate the levels from 0 on upward, to avoid missing
// data when there's a compaction.
for _, level := range s.levels {
iters = level.appendIterators(iters, reversed)
iters = level.appendIterators(iters, opt)
}
return iters
}
Expand Down

0 comments on commit 7d46029

Please sign in to comment.