Skip to content

Commit

Permalink
Use the window size for sampling, instead of a fixed 10MB size. (dgra…
Browse files Browse the repository at this point in the history
…ph-io#501)

* Use the window size for sampling, instead of a fixed 10MB size.
* Use count window, just like size window.
* Ensure that we return ErrNoRewrite if no logs are picked.
  • Loading branch information
manishrjain authored Jun 4, 2018
1 parent e201d7b commit 8b1006b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 10 deletions.
5 changes: 4 additions & 1 deletion integration/testgc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,10 @@ func main() {
// Run value log GC.
defer closer.Done()
var count int
ticker := time.NewTicker(30 * time.Second)
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for range ticker.C {
again:
select {
case <-closer.HasBeenClosed():
log.Printf("Num times value log GC was successful: %d\n", count)
Expand All @@ -142,6 +143,7 @@ func main() {
log.Printf("Result of value log GC: %v\n", err)
if err == nil {
count++
goto again
}
}
}()
Expand Down Expand Up @@ -218,4 +220,5 @@ func main() {
log.Fatalf("Error while iterating: %v", err)
}
log.Println("Iteration done. Test successful.")
time.Sleep(time.Minute) // Time to do some poking around.
}
28 changes: 19 additions & 9 deletions value.go
Original file line number Diff line number Diff line change
Expand Up @@ -1068,11 +1068,15 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
tr.SetError()
return err
}
window := float64(fi.Size()) * 0.1 // 10% of the file as window.

// Set up the sampling window sizes.
sizeWindow := float64(fi.Size()) * 0.1 // 10% of the file as window.
countWindow := int(float64(vlog.opt.ValueLogMaxEntries) * 0.01) // 1% of num entries.
tr.LazyPrintf("Size window: %5.2f. Count window: %d.", sizeWindow, countWindow)

// Pick a random start point for the log.
skipFirstM := float64(rand.Int63n(fi.Size())) // Pick a random starting location.
skipFirstM -= window // Avoid hitting EOF by moving back by window.
skipFirstM -= sizeWindow // Avoid hitting EOF by moving back by window.
skipFirstM /= float64(mi) // Convert to MBs.
tr.LazyPrintf("Skip first %5.2f MB of file of size: %d MB", skipFirstM, fi.Size()/mi)
var skipped float64
Expand All @@ -1084,18 +1088,18 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
var numIterations int
err = vlog.iterate(lf, 0, func(e Entry, vp valuePointer) error {
numIterations++
esz := float64(vp.Len) / (1 << 20) // in MBs. +4 for the CAS stuff.
esz := float64(vp.Len) / (1 << 20) // in MBs.
if skipped < skipFirstM {
skipped += esz
return nil
}

// Sample until we reach window size or 10K entries or exceed 10 seconds.
if r.count > 10000 {
tr.LazyPrintf("Stopping sampling after 10K entries.")
// Sample until we reach the window sizes or exceed 10 seconds.
if r.count > countWindow {
tr.LazyPrintf("Stopping sampling after %d entries.", countWindow)
return errStop
}
if r.total > window {
if r.total > sizeWindow {
tr.LazyPrintf("Stopping sampling after reaching window size.")
return errStop
}
Expand Down Expand Up @@ -1158,8 +1162,9 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
tr.LazyPrintf("Fid: %d. Skipped: %5.2fMB Num iterations: %d. Data status=%+v\n",
lf.fid, skipped, numIterations, r)

// If we sampled at least 10MB, we can make a call about rewrite.
if (r.count < 10000 && r.total < 10.0) || r.discard < discardRatio*r.total {
// If we couldn't sample at least a 1000 KV pairs or at least 75% of the window size,
// and what we can discard is below the threshold, we should skip the rewrite.
if (r.count < countWindow && r.total < sizeWindow*0.75) || r.discard < discardRatio*r.total {
tr.LazyPrintf("Skipping GC on fid: %d", lf.fid)
return ErrNoRewrite
}
Expand All @@ -1185,13 +1190,18 @@ func (vlog *valueLog) runGC(discardRatio float64, head valuePointer) error {
case vlog.garbageCh <- struct{}{}:
// Pick a log file for GC.
tr := trace.New("Badger.ValueLog", "GC")
tr.SetMaxEvents(100)
defer func() {
tr.Finish()
<-vlog.garbageCh
}()

var err error
files := vlog.pickLog(head, tr)
if len(files) == 0 {
tr.LazyPrintf("PickLog returned zero results.")
return ErrNoRewrite
}
tried := make(map[uint32]bool)
for _, lf := range files {
if _, done := tried[lf.fid]; done {
Expand Down

0 comments on commit 8b1006b

Please sign in to comment.