Skip to content

Commit

Permalink
Fix level 0 GC dataloss bug (dgraph-io#1090)
Browse files Browse the repository at this point in the history
When all level 0 tables are kept in memory and GC is triggered,
it is possible that GC removes data from the value log file which
has not yet persisted to disk. This PR fixes this issue by garbage
collecting only the data which has been written to SSTs on the disk.

When L0 is in memory, we GC only the data which is stored in Level 1 onwards.
When L0 is not in memory. we GC data Level 0 onwards.
  • Loading branch information
Ibrahim Jarif authored Oct 23, 2019
1 parent 1afe2d0 commit 12dea02
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 5 deletions.
12 changes: 10 additions & 2 deletions db.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,7 @@ func (db *DB) get(key []byte) (y.ValueStruct, error) {
*maxVs = vs
}
}
return db.lc.get(key, maxVs)
return db.lc.get(key, maxVs, 0)
}

func (db *DB) updateHead(ptrs []valuePointer) {
Expand Down Expand Up @@ -1107,10 +1107,18 @@ func (db *DB) RunValueLogGC(discardRatio float64) error {
return ErrInvalidRequest
}

// startLevel is the level from which we should search for the head key. When badger is running
// with KeepL0InMemory flag, all tables on L0 are kept in memory. This means we should pick head
// key from Level 1 onwards because if we pick the headkey from Level 0 we might end up losing
// data. See test TestL0GCBug.
startLevel := 0
if db.opt.KeepL0InMemory {
startLevel = 1
}
// Find head on disk
headKey := y.KeyWithTs(head, math.MaxUint64)
// Need to pass with timestamp, lsm get removes the last 8 bytes and compares key
val, err := db.lc.get(headKey, nil)
val, err := db.lc.get(headKey, nil, startLevel)
if err != nil {
return errors.Wrap(err, "Retrieving head from on-disk LSM")
}
Expand Down
72 changes: 72 additions & 0 deletions db2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -587,3 +587,75 @@ func TestReadSameVlog(t *testing.T) {
})
})
}

// The test ensures we don't lose data when badger is opened with KeepL0InMemory and GC is being
// done.
func TestL0GCBug(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer removeDir(dir)

// Do not change any of the options below unless it's necessary.
opts := getTestOptions(dir)
opts.MaxTableSize = 1920
opts.NumMemtables = 1
opts.ValueLogMaxEntries = 2
opts.ValueThreshold = 2
opts.KeepL0InMemory = true
// Setting LoadingMode to mmap seems to cause segmentation fault while closing DB.
opts.ValueLogLoadingMode = options.FileIO
opts.TableLoadingMode = options.FileIO

db1, err := Open(opts)
require.NoError(t, err)
key := func(i int) []byte {
return []byte(fmt.Sprintf("%10d", i))
}
val := []byte{1, 1, 1, 1, 1, 1, 1, 1}
// Insert 100 entries. This will create about 50 vlog files and 2 SST files.
for i := 0; i < 100; i++ {
err = db1.Update(func(txn *Txn) error {
return txn.SetEntry(NewEntry(key(i), val))
})
require.NoError(t, err)
}
// Run value log GC multiple times. This would ensure at least
// one value log file is garbage collected.
for i := 0; i < 10; i++ {
err := db1.RunValueLogGC(0.01)
if err != nil && err != ErrNoRewrite {
t.Fatalf(err.Error())
}
}

// CheckKeys reads all the keys previously stored.
checkKeys := func(db *DB) {
for i := 0; i < 100; i++ {
err := db.View(func(txn *Txn) error {
item, err := txn.Get(key(i))
require.NoError(t, err)
val1 := getItemValue(t, item)
require.Equal(t, val, val1)
return nil
})
require.NoError(t, err)
}
}

checkKeys(db1)
// Simulate a crash by not closing db1 but releasing the locks.
if db1.dirLockGuard != nil {
require.NoError(t, db1.dirLockGuard.release())
}
if db1.valueDirGuard != nil {
require.NoError(t, db1.valueDirGuard.release())
}
require.NoError(t, db1.vlog.Close())

db2, err := Open(opts)
require.NoError(t, err)

// Ensure we still have all the keys.
checkKeys(db2)
require.NoError(t, db2.Close())
}
11 changes: 8 additions & 3 deletions levels.go
Original file line number Diff line number Diff line change
Expand Up @@ -939,14 +939,19 @@ func (s *levelsController) close() error {
}

// get returns the found value if any. If not found, we return nil.
func (s *levelsController) get(key []byte, maxVs *y.ValueStruct) (y.ValueStruct, error) {
// It's important that we iterate the levels from 0 on upward. The reason is, if we iterated
func (s *levelsController) get(key []byte, maxVs *y.ValueStruct, startLevel int) (
y.ValueStruct, error) {
// It's important that we iterate the levels from 0 on upward. The reason is, if we iterated
// in opposite order, or in parallel (naively calling all the h.RLock() in some order) we could
// read level L's tables post-compaction and level L+1's tables pre-compaction. (If we do
// read level L's tables post-compaction and level L+1's tables pre-compaction. (If we do
// parallelize this, we will need to call the h.RLock() function by increasing order of level
// number.)
version := y.ParseTs(key)
for _, h := range s.levels {
// Ignore all levels below startLevel. This is useful for GC when L0 is kept in memory.
if h.level < startLevel {
continue
}
vs, err := h.get(key) // Calls h.RLock() and h.RUnlock().
if err != nil {
return y.ValueStruct{}, errors.Wrapf(err, "get key: %q", key)
Expand Down

0 comments on commit 12dea02

Please sign in to comment.