Skip to content

Commit

Permalink
Info command: Show histogram of key/value sizes and number of keys pe…
Browse files Browse the repository at this point in the history
…r table (hypermodeinc#730)

* Add ShowKeyValueHistogram method

This commit adds ShowKeyValueHistogram method and types associated with it.

Signed-off-by: Ibrahim Jarif <[email protected]>

* Remove whitespace

Signed-off-by: Ibrahim Jarif <[email protected]>

* Fix comment

Signed-off-by: Ibrahim Jarif <[email protected]>

* Move histogram code to histogram.go and add histogram_test.go file

Signed-off-by: Ibrahim Jarif <[email protected]>

* Show number of keys present per SSTable

The output of `badger info` command now shows the number of keys per SSTable

Signed-off-by: Ibrahim Jarif <[email protected]>

* Rename ShowKeyValueSizeHistogram to PrintKeyValueHistogram

Signed-off-by: Ibrahim Jarif <[email protected]>
  • Loading branch information
jarifibrahim authored and manishrjain committed Mar 7, 2019
1 parent b669ca0 commit fd59907
Show file tree
Hide file tree
Showing 6 changed files with 346 additions and 38 deletions.
77 changes: 48 additions & 29 deletions badger/cmd/info.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ import (
"github.com/spf13/cobra"
)

type flagOptions struct {
showTables bool
sizeHistogram bool
}

var (
opt flagOptions
)

func init() {
RootCmd.AddCommand(infoCmd)
infoCmd.Flags().BoolVarP(&opt.showTables, "show-tables", "s", false,
"If set to true, show tables as well.")
infoCmd.Flags().BoolVar(&opt.sizeHistogram, "histogram", false,
"Show a histogram of the key and value sizes.")
}

var infoCmd = &cobra.Command{
Use: "info",
Short: "Health info about Badger database.",
Expand All @@ -48,23 +65,33 @@ to the Dgraph team.
fmt.Println("Error:", err.Error())
os.Exit(1)
}
if !showTables {
if !opt.showTables {
return
}
err = tableInfo(sstDir, vlogDir)
// Open DB
opts := badger.DefaultOptions
opts.TableLoadingMode = options.MemoryMap
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.ReadOnly = true

db, err := badger.Open(opts)
if err != nil {
fmt.Println("Error:", err.Error())
os.Exit(1)
}
},
}
defer db.Close()

var showTables bool

func init() {
RootCmd.AddCommand(infoCmd)
infoCmd.Flags().BoolVarP(&showTables, "show-tables", "s", false,
"If set to true, show tables as well.")
err = tableInfo(sstDir, vlogDir, db)
if err != nil {
fmt.Println("Error:", err.Error())
os.Exit(1)
}
if opt.sizeHistogram {
// use prefix as nil since we want to list all keys
db.ShowKeyValueSizeHistogram(nil)
}
},
}

func hbytes(sz int64) string {
Expand All @@ -75,27 +102,20 @@ func dur(src, dst time.Time) string {
return humanize.RelTime(dst, src, "earlier", "later")
}

func tableInfo(dir, valueDir string) error {
// Open DB
opts := badger.DefaultOptions
opts.TableLoadingMode = options.MemoryMap
opts.Dir = sstDir
opts.ValueDir = vlogDir
opts.ReadOnly = true

db, err := badger.Open(opts)
if err != nil {
return err
}
defer db.Close()

func tableInfo(dir, valueDir string, db *badger.DB) error {
tables := db.Tables()
fmt.Printf("\n%s SSTables %[1]s\n", strings.Repeat("=", 45))
fmt.Printf("%-5s\t%-10s\t%-30s\t%-30s\t%-7s\n", "ID", "Level",
"Left-Key(in hex) (Time)", "Right-Key(in hex) (Time)", "Total Keys")
fmt.Printf("%s\n", strings.Repeat("=", 100))
for _, t := range tables {
lk, lv := y.ParseKey(t.Left), y.ParseTs(t.Left)
rk, rv := y.ParseKey(t.Right), y.ParseTs(t.Right)
fmt.Printf("SSTable [L%d, %03d] [%20X, v%-10d -> %20X, v%-10d]\n",
t.Level, t.ID, lk, lv, rk, rv)
lk, lt := y.ParseKey(t.Left), y.ParseTs(t.Left)
rk, rt := y.ParseKey(t.Right), y.ParseTs(t.Right)

fmt.Printf("%-5d\tL%-9d\t%-30s\t%-30s\t%-7d\n", t.ID, t.Level,
fmt.Sprintf("%X (v%d)", lk, lt), fmt.Sprintf("%X (v%d)", rk, rt), t.KeyCount)
}
fmt.Println()
return nil
}

Expand Down Expand Up @@ -135,7 +155,6 @@ func printInfo(dir, valueDir string) error {

fmt.Println()
var baseTime time.Time
// fmt.Print("\n[Manifest]\n")
manifestTruncated := false
manifestInfo, ok := fileinfoByName[badger.ManifestFilename]
if ok {
Expand Down
169 changes: 169 additions & 0 deletions histogram.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
/*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package badger

import (
"fmt"
"math"
)

// PrintKeyValueHistogram builds and displays the key-value size histogram.
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
// considered for creating the histogram
func (db *DB) PrintKeyValueHistogram(keyPrefix []byte) {
if db == nil {
fmt.Println("\nCannot build histogram: DB is nil.")
return
}
histogram := db.buildKeyValueSizeHistogram(keyPrefix)
fmt.Printf("Histogram of key sizes (in bytes)\n")
histogram.keySizeHistogram.printHistogram()
fmt.Printf("Histogram of value sizes (in bytes)\n")
histogram.valueSizeHistogram.printHistogram()
}

// histogramData stores information about a histogram
type histogramData struct {
bins []int64
countPerBin []int64
totalCount int64
min int64
max int64
sum int64
}

// keyValueSizeHistogram contains keySize histogram and valueSize histogram
type keyValueSizeHistogram struct {
keySizeHistogram, valueSizeHistogram histogramData
}

// newKeyValueSizeHistogram returns a new instance of keyValueSizeHistogram with
// properly initialized fields.
func newKeyValueSizeHistogram() *keyValueSizeHistogram {
// TODO(ibrahim): find appropriate bin size.
keyBins := createHistogramBins(1, 16)
valueBins := createHistogramBins(1, 30)
return &keyValueSizeHistogram{
keySizeHistogram: histogramData{
bins: keyBins,
countPerBin: make([]int64, len(keyBins)+1),
max: math.MinInt64,
min: math.MaxInt64,
sum: 0,
},
valueSizeHistogram: histogramData{
bins: valueBins,
countPerBin: make([]int64, len(valueBins)+1),
max: math.MinInt64,
min: math.MaxInt64,
sum: 0,
},
}
}

// createHistogramBins creates bins for an histogram. The bin sizes are powers
// of two of the form [2^min_exponent, ..., 2^max_exponent].
func createHistogramBins(minExponent, maxExponent uint32) []int64 {
var bins []int64
for i := minExponent; i <= maxExponent; i++ {
bins = append(bins, int64(1)<<i)
}
return bins
}

// Update the min and max fields if value is less than or greater than the
// current min/max value.
func (histogram *histogramData) Update(value int64) {
if value > histogram.max {
histogram.max = value
}
if value < histogram.min {
histogram.min = value
}

histogram.sum += value
histogram.totalCount++

for index := 0; index <= len(histogram.bins); index++ {
// Allocate value in the last buckets if we reached the end of the Bounds array.
if index == len(histogram.bins) {
histogram.countPerBin[index]++
break
}

// Check if the value should be added to the "index" bin
if value < int64(histogram.bins[index]) {
histogram.countPerBin[index]++
break
}
}
}

// buildKeyValueSizeHistogram builds the key-value size histogram.
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are
// considered for creating the histogram
func (db *DB) buildKeyValueSizeHistogram(keyPrefix []byte) *keyValueSizeHistogram {
txn := db.NewTransaction(false)
defer txn.Discard()

itr := txn.NewIterator(DefaultIteratorOptions)
defer itr.Close()

badgerHistogram := newKeyValueSizeHistogram()

// Collect key and value sizes.
for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
item := itr.Item()
badgerHistogram.keySizeHistogram.Update(item.KeySize())
badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
}
return badgerHistogram
}

// printHistogram prints the histogram data in a human-readable format.
func (histogram histogramData) printHistogram() {
fmt.Printf("Total count: %d\n", histogram.totalCount)
fmt.Printf("Min value: %d\n", histogram.min)
fmt.Printf("Max value: %d\n", histogram.max)
fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
fmt.Printf("%24s %9s\n", "Range", "Count")

numBins := len(histogram.bins)
for index, count := range histogram.countPerBin {
if count == 0 {
continue
}

// The last bin represents the bin that contains the range from
// the last bin up to infinity so it's processed differently than the
// other bins.
if index == len(histogram.countPerBin)-1 {
lowerBound := int(histogram.bins[numBins-1])
fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
continue
}

upperBound := int(histogram.bins[index])
lowerBound := 0
if index > 0 {
lowerBound = int(histogram.bins[index-1])
}

fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
}
fmt.Println()
}
107 changes: 107 additions & 0 deletions histogram_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
/*
* Copyright 2019 Dgraph Labs, Inc. and Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package badger

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestBuildKeyValueSizeHistogram(t *testing.T) {
t.Run("All same size key-values", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
entries := int64(40)
err := db.Update(func(txn *Txn) error {
for i := int64(0); i < entries; i++ {
err := txn.Set([]byte(string(i)), []byte("B"))
if err != nil {
return err
}
}
return nil
})
require.NoError(t, err)

histogram := db.buildKeyValueSizeHistogram(nil)
keyHistogram := histogram.keySizeHistogram
valueHistogram := histogram.valueSizeHistogram

require.Equal(t, entries, keyHistogram.totalCount)
require.Equal(t, entries, valueHistogram.totalCount)

// Each entry is of size one. So the sum of sizes should be the same
// as number of entries
require.Equal(t, entries, valueHistogram.sum)
require.Equal(t, entries, keyHistogram.sum)

// All value sizes are same. The first bin should have all the values.
require.Equal(t, entries, valueHistogram.countPerBin[0])
require.Equal(t, entries, keyHistogram.countPerBin[0])

require.Equal(t, int64(1), keyHistogram.max)
require.Equal(t, int64(1), keyHistogram.min)
require.Equal(t, int64(1), valueHistogram.max)
require.Equal(t, int64(1), valueHistogram.min)
})
})

t.Run("different size key-values", func(t *testing.T) {
runBadgerTest(t, nil, func(t *testing.T, db *DB) {
entries := int64(3)
err := db.Update(func(txn *Txn) error {
if err := txn.Set([]byte("A"), []byte("B")); err != nil {
return err
}

if err := txn.Set([]byte("AA"), []byte("BB")); err != nil {
return err
}

if err := txn.Set([]byte("AAA"), []byte("BBB")); err != nil {
return err
}
return nil
})
require.NoError(t, err)

histogram := db.buildKeyValueSizeHistogram(nil)
keyHistogram := histogram.keySizeHistogram
valueHistogram := histogram.valueSizeHistogram

require.Equal(t, entries, keyHistogram.totalCount)
require.Equal(t, entries, valueHistogram.totalCount)

// Each entry is of size one. So the sum of sizes should be the same
// as number of entries
require.Equal(t, int64(6), valueHistogram.sum)
require.Equal(t, int64(6), keyHistogram.sum)

// Lenght 1 key is in first bucket, length 2 and 3 are in the second
// bucket
require.Equal(t, int64(1), valueHistogram.countPerBin[0])
require.Equal(t, int64(2), valueHistogram.countPerBin[1])
require.Equal(t, int64(1), keyHistogram.countPerBin[0])
require.Equal(t, int64(2), keyHistogram.countPerBin[1])

require.Equal(t, int64(3), keyHistogram.max)
require.Equal(t, int64(1), keyHistogram.min)
require.Equal(t, int64(3), valueHistogram.max)
require.Equal(t, int64(1), valueHistogram.min)
})
})
}
Loading

0 comments on commit fd59907

Please sign in to comment.