Skip to content

Commit

Permalink
[BREAKING]: Change how Badger handles WAL (dgraph-io#1555)
Browse files Browse the repository at this point in the history
This PR significantly improves Badger's disk usage behavior.

Breaking: This PR increases the magic version from 7 to 8. So, no older Badger directories would work with this change.

With this PR, we no longer use value log as write-ahead log. Instead, each MemTable has its own WAL. Value logs now only write values which are greater than ValueThreshold, while MemTable WAL only writes smaller values and value pointers.

On a crash and restart, the MemTable WALs are replayed to apply updates to Skiplist. When MemTables are flushed to L0, the corresponding WALs are deleted.

This PR makes big changes to how value log GC works:
- Discard stats are now stored in a separate file, instead of within the LSM tree.
- GC only picks up value logs based off discard stats.
- GC no longer does sampling, it uses discard stats to inform when a value log needs to be GCed.
- Value log would now no longer grow indefinitely, because of the shift to MemTable WAL.
- Removed the `badger gc` tool.
- Value Log Head pointer tracking is removed.
- Only the last value log file is replayed on every start, and truncated as necessary.

This PR also makes a bunch of other changes:
- Removes ValueLogLoadingMode (always uses mmap now).
- Removes TableLoadingMode (always uses mmap now).
- Removes Truncate option.
- Removes KeepL0InMemory option.
  • Loading branch information
manishrjain authored Oct 7, 2020
1 parent 0b10bb0 commit e3a0d29
Show file tree
Hide file tree
Showing 60 changed files with 1,702 additions and 3,030 deletions.
3 changes: 2 additions & 1 deletion backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ func TestBackupRestore1(t *testing.T) {
if err != nil {
return err
}
t.Logf("Got entry: %v\n", item.Version())
require.Equal(t, entries[count].key, item.Key())
require.Equal(t, entries[count].val, val)
require.Equal(t, entries[count].version, item.Version())
Expand All @@ -112,7 +113,7 @@ func TestBackupRestore1(t *testing.T) {
return nil
})
require.NoError(t, err)
require.Equal(t, db.orc.nextTs(), uint64(3))
require.Equal(t, 3, int(db.orc.nextTs()))
}

func TestBackupRestore2(t *testing.T) {
Expand Down
4 changes: 0 additions & 4 deletions badger/cmd/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ import (
)

var backupFile string
var truncate bool

// backupCmd represents the backup command
var backupCmd = &cobra.Command{
Expand All @@ -45,16 +44,13 @@ func init() {
RootCmd.AddCommand(backupCmd)
backupCmd.Flags().StringVarP(&backupFile, "backup-file", "f",
"badger.bak", "File to backup to")
backupCmd.Flags().BoolVarP(&truncate, "truncate", "t",
false, "Allow value log truncation if required.")
backupCmd.Flags().IntVarP(&numVersions, "num-versions", "n",
0, "Number of versions to keep. A value <= 0 means keep all versions.")
}

func doBackup(cmd *cobra.Command, args []string) error {
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithTruncate(truncate).
WithNumVersionsToKeep(math.MaxInt32)

if numVersions > 0 {
Expand Down
5 changes: 2 additions & 3 deletions badger/cmd/bank.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,11 +364,10 @@ func runTest(cmd *cobra.Command, args []string) error {
WithNumVersionsToKeep(int(math.MaxInt32)).
WithValueThreshold(1). // Make all values go to value log
WithCompression(options.ZSTD).
WithKeepL0InMemory(false).
WithBlockCacheSize(10 << 20)

if mmap {
opts = opts.WithTableLoadingMode(options.MemoryMap)
if verbose {
opts = opts.WithLoggingLevel(badger.DEBUG)
}

if encryptionKey != "" {
Expand Down
1 change: 0 additions & 1 deletion badger/cmd/flatten.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ func flatten(cmd *cobra.Command, args []string) error {
}
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithTruncate(truncate).
WithNumVersionsToKeep(numVersions).
WithNumCompactors(0)
fmt.Printf("Opening badger with options = %+v\n", opt)
Expand Down
21 changes: 9 additions & 12 deletions badger/cmd/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ import (
"github.com/pkg/errors"

"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/table"
"github.com/dgraph-io/badger/v2/y"
humanize "github.com/dustin/go-humanize"
Expand Down Expand Up @@ -93,18 +92,16 @@ to the Dgraph team.

func handleInfo(cmd *cobra.Command, args []string) error {
if err := printInfo(sstDir, vlogDir); err != nil {
return errors.Wrap(err, "failed to print information in MANIFEST file")
return y.Wrap(err, "failed to print information in MANIFEST file")
}

// Open DB
db, err := badger.Open(badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(opt.readOnly).
WithTruncate(opt.truncate).
WithTableLoadingMode(options.MemoryMap).
WithEncryptionKey([]byte(opt.encryptionKey)))
if err != nil {
return errors.Wrap(err, "failed to open database")
return y.Wrap(err, "failed to open database")
}
defer db.Close()

Expand All @@ -114,7 +111,7 @@ func handleInfo(cmd *cobra.Command, args []string) error {

prefix, err := hex.DecodeString(opt.withPrefix)
if err != nil {
return errors.Wrapf(err, "failed to decode hex prefix: %s", opt.withPrefix)
return y.Wrapf(err, "failed to decode hex prefix: %s", opt.withPrefix)
}
if opt.showHistogram {
db.PrintHistogram(prefix)
Expand All @@ -128,7 +125,7 @@ func handleInfo(cmd *cobra.Command, args []string) error {

if len(opt.keyLookup) > 0 {
if err := lookup(db); err != nil {
return errors.Wrapf(err, "failed to perform lookup for the key: %x", opt.keyLookup)
return y.Wrapf(err, "failed to perform lookup for the key: %x", opt.keyLookup)
}
}
return nil
Expand All @@ -153,7 +150,7 @@ func showKeys(db *badger.DB, prefix []byte) error {
for it.Rewind(); it.Valid(); it.Next() {
item := it.Item()
if err := printKey(item, false); err != nil {
return errors.Wrapf(err, "failed to print information about key: %x(%d)",
return y.Wrapf(err, "failed to print information about key: %x(%d)",
item.Key(), item.Version())
}
totalKeys++
Expand All @@ -170,7 +167,7 @@ func lookup(db *badger.DB) error {

key, err := hex.DecodeString(opt.keyLookup)
if err != nil {
return errors.Wrapf(err, "failed to decode key: %q", opt.keyLookup)
return y.Wrapf(err, "failed to decode key: %q", opt.keyLookup)
}

iopts := badger.DefaultIteratorOptions
Expand All @@ -186,7 +183,7 @@ func lookup(db *badger.DB) error {
fmt.Println()
item := itr.Item()
if err := printKey(item, true); err != nil {
return errors.Wrapf(err, "failed to print information about key: %x(%d)",
return y.Wrapf(err, "failed to print information about key: %x(%d)",
item.Key(), item.Version())
}

Expand All @@ -201,7 +198,7 @@ func lookup(db *badger.DB) error {
break
}
if err := printKey(item, true); err != nil {
return errors.Wrapf(err, "failed to print information about key: %x(%d)",
return y.Wrapf(err, "failed to print information about key: %x(%d)",
item.Key(), item.Version())
}
}
Expand All @@ -223,7 +220,7 @@ func printKey(item *badger.Item, showValue bool) error {
if showValue {
val, err := item.ValueCopy(nil)
if err != nil {
return errors.Wrapf(err,
return y.Wrapf(err,
"failed to copy value of the key: %x(%d)", item.Key(), item.Version())
}
fmt.Fprintf(&buf, "\n\tvalue: %v", val)
Expand Down
20 changes: 0 additions & 20 deletions badger/cmd/read_bench.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@ import (
"fmt"
"math"
"math/rand"
"strings"
"sync/atomic"
"time"

humanize "github.com/dustin/go-humanize"
"github.com/spf13/cobra"

"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/pb"
"github.com/dgraph-io/badger/v2/y"
"github.com/dgraph-io/ristretto/z"
Expand Down Expand Up @@ -107,12 +105,9 @@ func readBench(cmd *cobra.Command, args []string) error {
return y.Wrapf(err, "unable to parse duration")
}
y.AssertTrue(numGoroutines > 0)
mode := getLoadingMode(loadingMode)
opt := badger.DefaultOptions(sstDir).
WithValueDir(vlogDir).
WithReadOnly(readOnly).
WithTableLoadingMode(mode).
WithValueLogLoadingMode(mode).
WithBlockCacheSize(blockCacheSize << 20).
WithIndexCacheSize(indexCacheSize << 20)
fmt.Printf("Opening badger with options = %+v\n", opt)
Expand Down Expand Up @@ -239,18 +234,3 @@ func getSampleKeys(db *badger.DB) ([][]byte, error) {

return keys, nil
}

func getLoadingMode(m string) options.FileLoadingMode {
m = strings.ToLower(m)
var mode options.FileLoadingMode
switch m {
case "fileio":
mode = options.FileIO
case "mmap":
mode = options.MemoryMap
default:
panic("loading mode not supported")
}

return mode
}
5 changes: 5 additions & 0 deletions badger/cmd/rotate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,12 @@ func TestRotate(t *testing.T) {
dir, err := ioutil.TempDir("", "badger-test")
require.NoError(t, err)
defer os.RemoveAll(dir)

// Creating sample key.
key := make([]byte, 32)
_, err = rand.Read(key)
require.NoError(t, err)

fp, err := ioutil.TempFile("", "*.key")
require.NoError(t, err)
_, err = fp.Write(key)
Expand All @@ -44,6 +46,8 @@ func TestRotate(t *testing.T) {
// Opening DB with the encryption key.
opts := badger.DefaultOptions(dir)
opts.EncryptionKey = key
opts.BlockCacheSize = 1 << 20

db, err := badger.Open(opts)
require.NoError(t, err)
// Closing the db.
Expand Down Expand Up @@ -126,6 +130,7 @@ func TestRotatePlainTextToEncrypted(t *testing.T) {
require.Nil(t, doRotate(nil, []string{}))

// Try opening DB without the key.
opts.BlockCacheSize = 1 << 20
_, err = badger.Open(opts)
require.EqualError(t, err, badger.ErrEncryptionKeyMismatch.Error())

Expand Down
5 changes: 2 additions & 3 deletions badger/cmd/stream.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/dgraph-io/badger/v2"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/y"
"github.com/pkg/errors"
"github.com/spf13/cobra"
)
Expand All @@ -44,7 +45,6 @@ func init() {
RootCmd.AddCommand(streamCmd)
streamCmd.Flags().StringVarP(&outDir, "out", "o", "",
"Path to output DB. The directory should be empty.")
streamCmd.Flags().BoolVarP(&truncate, "truncate", "", false, "Option to truncate the DBs")
streamCmd.Flags().BoolVarP(&readOnly, "read_only", "", true,
"Option to open input DB in read-only mode")
streamCmd.Flags().IntVarP(&numVersions, "num_versions", "", 0,
Expand Down Expand Up @@ -76,7 +76,6 @@ func stream(cmd *cobra.Command, args []string) error {
}
inOpt := badger.DefaultOptions(sstDir).
WithReadOnly(readOnly).
WithTruncate(truncate).
WithValueThreshold(1 << 10 /* 1KB */).
WithNumVersionsToKeep(numVersions)

Expand All @@ -90,7 +89,7 @@ func stream(cmd *cobra.Command, args []string) error {

inDB, err := badger.OpenManaged(inOpt)
if err != nil {
return errors.Wrapf(err, "cannot open DB at %s", sstDir)
return y.Wrapf(err, "cannot open DB at %s", sstDir)
}
defer inDB.Close()
return inDB.StreamDB(outOpt)
Expand Down
84 changes: 0 additions & 84 deletions badger/cmd/vloggc.go

This file was deleted.

Loading

0 comments on commit e3a0d29

Please sign in to comment.