Skip to content

Commit

Permalink
New stats for reclaimable disk space from scorch index (blevesearch#1470
Browse files Browse the repository at this point in the history
)

* New stats for reclaimable disk space from scorch index

Adding a new stats `num_bytes_used_disk_by_root_reclaimable`
which helps to approximate the amount of reclaimable disk space
from a scorch index. This could turn out to be a useful insight
into knowing the amount of disk space wasted due to tombstoned/obsoleted
contents across the segments.

* refactoring to reduce the lock time for capturing the disk stats.
  • Loading branch information
sreekanth-cb authored Sep 28, 2020
1 parent c997b4a commit 99e9e90
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 11 deletions.
22 changes: 11 additions & 11 deletions index/scorch/scorch.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,21 +515,17 @@ func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
}

func (s *Scorch) rootDiskSegmentsPaths() map[string]struct{} {
rv := make(map[string]struct{}, len(s.root.segment))
for _, segmentSnapshot := range s.root.segment {
if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
rv[seg.Path()] = struct{}{}
}
}
return rv
}

func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()

indexSnapshot := s.currentSnapshot()
defer func() {
_ = indexSnapshot.Close()
}()

rootSegPaths := indexSnapshot.diskSegmentsPaths()

s.rootLock.RLock()
rootSegPaths := s.rootDiskSegmentsPaths()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()

Expand All @@ -556,6 +552,10 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["num_bytes_used_disk"] = numBytesUsedDisk
// total disk bytes by the latest root index, exclusive of older snapshots
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
// num_bytes_used_disk_by_root_reclaimable is an approximation about the
// reclaimable disk space in an index. (eg: from a full compaction)
m["num_bytes_used_disk_by_root_reclaimable"] = uint64(float64(numBytesOnDiskByRoot) *
indexSnapshot.reClaimableDocsRatio())
m["num_files_on_disk"] = numFilesOnDisk
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
Expand Down
27 changes: 27 additions & 0 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,33 @@ func (i *IndexSnapshot) DumpFields() chan interface{} {
return rv
}

func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} {
rv := make(map[string]struct{}, len(i.segment))
for _, segmentSnapshot := range i.segment {
if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
rv[seg.Path()] = struct{}{}
}
}
return rv
}

// reClaimableDocsRatio gives a ratio about the obsoleted or
// reclaimable documents present in a given index snapshot.
func (i *IndexSnapshot) reClaimableDocsRatio() float64 {
var totalCount, liveCount uint64
for _, segmentSnapshot := range i.segment {
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
totalCount += uint64(segmentSnapshot.FullSize())
liveCount += uint64(segmentSnapshot.Count())
}
}

if totalCount > 0 {
return float64(totalCount-liveCount) / float64(totalCount)
}
return 0
}

// subtractStrings returns set a minus elements of set b.
func subtractStrings(a, b []string) []string {
if len(b) == 0 {
Expand Down

0 comments on commit 99e9e90

Please sign in to comment.