forked from hypermodeinc/badger
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Info command: Show histogram of key/value sizes and number of keys pe…
…r table (hypermodeinc#730) * Add ShowKeyValueHistogram method This commit adds ShowKeyValueHistogram method and types associated with it. Signed-off-by: Ibrahim Jarif <[email protected]> * Remove whitespace Signed-off-by: Ibrahim Jarif <[email protected]> * Fix comment Signed-off-by: Ibrahim Jarif <[email protected]> * Move histogram code to histogram.go and add histogram_test.go file Signed-off-by: Ibrahim Jarif <[email protected]> * Show number of keys present per SSTable The output of `badger info` command now shows the number of keys per SSTable Signed-off-by: Ibrahim Jarif <[email protected]> * Rename ShowKeyValueSizeHistogram to PrintKeyValueHistogram Signed-off-by: Ibrahim Jarif <[email protected]>
- Loading branch information
1 parent
b669ca0
commit fd59907
Showing
6 changed files
with
346 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
/* | ||
* Copyright 2019 Dgraph Labs, Inc. and Contributors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package badger | ||
|
||
import ( | ||
"fmt" | ||
"math" | ||
) | ||
|
||
// PrintKeyValueHistogram builds and displays the key-value size histogram. | ||
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are | ||
// considered for creating the histogram | ||
func (db *DB) PrintKeyValueHistogram(keyPrefix []byte) { | ||
if db == nil { | ||
fmt.Println("\nCannot build histogram: DB is nil.") | ||
return | ||
} | ||
histogram := db.buildKeyValueSizeHistogram(keyPrefix) | ||
fmt.Printf("Histogram of key sizes (in bytes)\n") | ||
histogram.keySizeHistogram.printHistogram() | ||
fmt.Printf("Histogram of value sizes (in bytes)\n") | ||
histogram.valueSizeHistogram.printHistogram() | ||
} | ||
|
||
// histogramData stores information about a histogram | ||
type histogramData struct { | ||
bins []int64 | ||
countPerBin []int64 | ||
totalCount int64 | ||
min int64 | ||
max int64 | ||
sum int64 | ||
} | ||
|
||
// keyValueSizeHistogram contains keySize histogram and valueSize histogram | ||
type keyValueSizeHistogram struct { | ||
keySizeHistogram, valueSizeHistogram histogramData | ||
} | ||
|
||
// newKeyValueSizeHistogram returns a new instance of keyValueSizeHistogram with | ||
// properly initialized fields. | ||
func newKeyValueSizeHistogram() *keyValueSizeHistogram { | ||
// TODO(ibrahim): find appropriate bin size. | ||
keyBins := createHistogramBins(1, 16) | ||
valueBins := createHistogramBins(1, 30) | ||
return &keyValueSizeHistogram{ | ||
keySizeHistogram: histogramData{ | ||
bins: keyBins, | ||
countPerBin: make([]int64, len(keyBins)+1), | ||
max: math.MinInt64, | ||
min: math.MaxInt64, | ||
sum: 0, | ||
}, | ||
valueSizeHistogram: histogramData{ | ||
bins: valueBins, | ||
countPerBin: make([]int64, len(valueBins)+1), | ||
max: math.MinInt64, | ||
min: math.MaxInt64, | ||
sum: 0, | ||
}, | ||
} | ||
} | ||
|
||
// createHistogramBins creates bins for an histogram. The bin sizes are powers | ||
// of two of the form [2^min_exponent, ..., 2^max_exponent]. | ||
func createHistogramBins(minExponent, maxExponent uint32) []int64 { | ||
var bins []int64 | ||
for i := minExponent; i <= maxExponent; i++ { | ||
bins = append(bins, int64(1)<<i) | ||
} | ||
return bins | ||
} | ||
|
||
// Update the min and max fields if value is less than or greater than the | ||
// current min/max value. | ||
func (histogram *histogramData) Update(value int64) { | ||
if value > histogram.max { | ||
histogram.max = value | ||
} | ||
if value < histogram.min { | ||
histogram.min = value | ||
} | ||
|
||
histogram.sum += value | ||
histogram.totalCount++ | ||
|
||
for index := 0; index <= len(histogram.bins); index++ { | ||
// Allocate value in the last buckets if we reached the end of the Bounds array. | ||
if index == len(histogram.bins) { | ||
histogram.countPerBin[index]++ | ||
break | ||
} | ||
|
||
// Check if the value should be added to the "index" bin | ||
if value < int64(histogram.bins[index]) { | ||
histogram.countPerBin[index]++ | ||
break | ||
} | ||
} | ||
} | ||
|
||
// buildKeyValueSizeHistogram builds the key-value size histogram. | ||
// When keyPrefix is set, only the keys that have prefix "keyPrefix" are | ||
// considered for creating the histogram | ||
func (db *DB) buildKeyValueSizeHistogram(keyPrefix []byte) *keyValueSizeHistogram { | ||
txn := db.NewTransaction(false) | ||
defer txn.Discard() | ||
|
||
itr := txn.NewIterator(DefaultIteratorOptions) | ||
defer itr.Close() | ||
|
||
badgerHistogram := newKeyValueSizeHistogram() | ||
|
||
// Collect key and value sizes. | ||
for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() { | ||
item := itr.Item() | ||
badgerHistogram.keySizeHistogram.Update(item.KeySize()) | ||
badgerHistogram.valueSizeHistogram.Update(item.ValueSize()) | ||
} | ||
return badgerHistogram | ||
} | ||
|
||
// printHistogram prints the histogram data in a human-readable format. | ||
func (histogram histogramData) printHistogram() { | ||
fmt.Printf("Total count: %d\n", histogram.totalCount) | ||
fmt.Printf("Min value: %d\n", histogram.min) | ||
fmt.Printf("Max value: %d\n", histogram.max) | ||
fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount)) | ||
fmt.Printf("%24s %9s\n", "Range", "Count") | ||
|
||
numBins := len(histogram.bins) | ||
for index, count := range histogram.countPerBin { | ||
if count == 0 { | ||
continue | ||
} | ||
|
||
// The last bin represents the bin that contains the range from | ||
// the last bin up to infinity so it's processed differently than the | ||
// other bins. | ||
if index == len(histogram.countPerBin)-1 { | ||
lowerBound := int(histogram.bins[numBins-1]) | ||
fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count) | ||
continue | ||
} | ||
|
||
upperBound := int(histogram.bins[index]) | ||
lowerBound := 0 | ||
if index > 0 { | ||
lowerBound = int(histogram.bins[index-1]) | ||
} | ||
|
||
fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count) | ||
} | ||
fmt.Println() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
/* | ||
* Copyright 2019 Dgraph Labs, Inc. and Contributors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package badger | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestBuildKeyValueSizeHistogram(t *testing.T) { | ||
t.Run("All same size key-values", func(t *testing.T) { | ||
runBadgerTest(t, nil, func(t *testing.T, db *DB) { | ||
entries := int64(40) | ||
err := db.Update(func(txn *Txn) error { | ||
for i := int64(0); i < entries; i++ { | ||
err := txn.Set([]byte(string(i)), []byte("B")) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
return nil | ||
}) | ||
require.NoError(t, err) | ||
|
||
histogram := db.buildKeyValueSizeHistogram(nil) | ||
keyHistogram := histogram.keySizeHistogram | ||
valueHistogram := histogram.valueSizeHistogram | ||
|
||
require.Equal(t, entries, keyHistogram.totalCount) | ||
require.Equal(t, entries, valueHistogram.totalCount) | ||
|
||
// Each entry is of size one. So the sum of sizes should be the same | ||
// as number of entries | ||
require.Equal(t, entries, valueHistogram.sum) | ||
require.Equal(t, entries, keyHistogram.sum) | ||
|
||
// All value sizes are same. The first bin should have all the values. | ||
require.Equal(t, entries, valueHistogram.countPerBin[0]) | ||
require.Equal(t, entries, keyHistogram.countPerBin[0]) | ||
|
||
require.Equal(t, int64(1), keyHistogram.max) | ||
require.Equal(t, int64(1), keyHistogram.min) | ||
require.Equal(t, int64(1), valueHistogram.max) | ||
require.Equal(t, int64(1), valueHistogram.min) | ||
}) | ||
}) | ||
|
||
t.Run("different size key-values", func(t *testing.T) { | ||
runBadgerTest(t, nil, func(t *testing.T, db *DB) { | ||
entries := int64(3) | ||
err := db.Update(func(txn *Txn) error { | ||
if err := txn.Set([]byte("A"), []byte("B")); err != nil { | ||
return err | ||
} | ||
|
||
if err := txn.Set([]byte("AA"), []byte("BB")); err != nil { | ||
return err | ||
} | ||
|
||
if err := txn.Set([]byte("AAA"), []byte("BBB")); err != nil { | ||
return err | ||
} | ||
return nil | ||
}) | ||
require.NoError(t, err) | ||
|
||
histogram := db.buildKeyValueSizeHistogram(nil) | ||
keyHistogram := histogram.keySizeHistogram | ||
valueHistogram := histogram.valueSizeHistogram | ||
|
||
require.Equal(t, entries, keyHistogram.totalCount) | ||
require.Equal(t, entries, valueHistogram.totalCount) | ||
|
||
// Each entry is of size one. So the sum of sizes should be the same | ||
// as number of entries | ||
require.Equal(t, int64(6), valueHistogram.sum) | ||
require.Equal(t, int64(6), keyHistogram.sum) | ||
|
||
// Lenght 1 key is in first bucket, length 2 and 3 are in the second | ||
// bucket | ||
require.Equal(t, int64(1), valueHistogram.countPerBin[0]) | ||
require.Equal(t, int64(2), valueHistogram.countPerBin[1]) | ||
require.Equal(t, int64(1), keyHistogram.countPerBin[0]) | ||
require.Equal(t, int64(2), keyHistogram.countPerBin[1]) | ||
|
||
require.Equal(t, int64(3), keyHistogram.max) | ||
require.Equal(t, int64(1), keyHistogram.min) | ||
require.Equal(t, int64(3), valueHistogram.max) | ||
require.Equal(t, int64(1), valueHistogram.min) | ||
}) | ||
}) | ||
} |
Oops, something went wrong.