Skip to content

Commit

Permalink
matchfinder.M4: add LimitedSearch option
Browse files Browse the repository at this point in the history
Using LimitedSearch, it only checks for overlapping matches in one
place instead of checking at each byte.
This gains about 50% in compression speed while only losing about
2% in compression ratio.
  • Loading branch information
andybalholm committed Dec 30, 2023
1 parent 924a0eb commit 63f3f43
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 1 deletion.
8 changes: 8 additions & 0 deletions brotli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -657,3 +657,11 @@ func TestEncodeM4(t *testing.T) {
func BenchmarkEncodeM4(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
}

func TestEncodeM4Limited(t *testing.T) {
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, LimitedSearch: true}, 1<<16)
}

func BenchmarkEncodeM4Limited(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, LimitedSearch: true}, 1<<16)
}
11 changes: 10 additions & 1 deletion matchfinder/m4.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
// interface that uses a simple hash table to find matches,
// but the advanced parsing technique from
// https://fastcompression.blogspot.com/2011/12/advanced-parsing-strategies.html,
// except that it looks for matches at every input position.
// except that it normally looks for matches at every input position.
type M4 struct {
// MaxDistance is the maximum distance (in bytes) to look back for
// a match. The default is 65535.
Expand All @@ -28,6 +28,11 @@ type M4 struct {
// The default is 17 (128K entries).
TableBits int

// When LimitedSearch is true, it only looks for matches at certain
// points in the input rather than at every bite.
// (This makes compression faster, but hurts the compression ratio.)
LimitedSearch bool

table []uint32

history []byte
Expand Down Expand Up @@ -97,6 +102,10 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
candidate := int(q.table[h])
q.table[h] = uint32(i)

if q.LimitedSearch && i < matches[0].End && i != matches[0].End+2-q.HashLen {
continue
}

if candidate == 0 || i-candidate > q.MaxDistance || i-candidate == matches[0].Start-matches[0].Match {
continue
}
Expand Down

0 comments on commit 63f3f43

Please sign in to comment.