Skip to content

Commit

Permalink
feat: Improve zstd encoder and decoder usage in the compressor codec (h…
Browse files Browse the repository at this point in the history
  • Loading branch information
bignacio authored Jul 28, 2024
1 parent 8ea2833 commit f8a0492
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 12 deletions.
32 changes: 20 additions & 12 deletions ocf/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ func resolveCodec(name CodecName, lvl int) (Codec, error) {
return &SnappyCodec{}, nil

case ZStandard:
return &ZStandardCodec{}, nil
return newZStandardCodec(), nil

default:
return nil, fmt.Errorf("unknown codec %s", name)
Expand Down Expand Up @@ -127,20 +127,28 @@ func (*SnappyCodec) Encode(b []byte) []byte {
}

// ZStandardCodec is a zstandard compression codec.
type ZStandardCodec struct{}
type ZStandardCodec struct {
decoder *zstd.Decoder
encoder *zstd.Encoder
}

// Decode decodes the given bytes.
func (*ZStandardCodec) Decode(b []byte) ([]byte, error) {
dec, _ := zstd.NewReader(nil)
defer dec.Close()
func newZStandardCodec() *ZStandardCodec {
decoder, _ := zstd.NewReader(nil)
encoder, _ := zstd.NewWriter(nil)
return &ZStandardCodec{
decoder: decoder,
encoder: encoder,
}
}

return dec.DecodeAll(b, nil)
// Decode decodes the given bytes.
func (zstdCodec *ZStandardCodec) Decode(b []byte) ([]byte, error) {
defer func() { _ = zstdCodec.decoder.Reset(nil) }()
return zstdCodec.decoder.DecodeAll(b, nil)
}

// Encode encodes the given bytes.
func (*ZStandardCodec) Encode(b []byte) []byte {
enc, _ := zstd.NewWriter(nil)
defer func() { _ = enc.Close() }()

return enc.EncodeAll(b, nil)
func (zstdCodec *ZStandardCodec) Encode(b []byte) []byte {
defer zstdCodec.encoder.Reset(nil)
return zstdCodec.encoder.EncodeAll(b, nil)
}
106 changes: 106 additions & 0 deletions ocf/codec_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package ocf

import (
"math/rand"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestZstdEncodeDecodeLowEntropyLong(t *testing.T) {
input := makeTestData(8762, func() byte { return 'a' })

verifyZstdEncodeDecode(t, input)
}

func TestZstdEncodeDecodeLowEntropyShort(t *testing.T) {
input := makeTestData(7, func() byte { return 'a' })

verifyZstdEncodeDecode(t, input)
}

func TestZstdEncodeDecodeHighEntropyLong(t *testing.T) {
input := makeTestData(8762, func() byte { return byte(rand.Uint32()) })

verifyZstdEncodeDecode(t, input)
}

func TestZstdEncodeDecodeHighEntropyShort(t *testing.T) {
input := makeTestData(7, func() byte { return byte(rand.Uint32()) })

verifyZstdEncodeDecode(t, input)
}

/*
benchmark results always creating a new zstd encoder/decoder
goos: linux
goarch: amd64
pkg: github.com/hamba/avro/v2/ocf
cpu: AMD Ryzen 5 3550H with Radeon Vega Mobile Gfx
BenchmarkZstdEncodeDecodeLowEntropyLong
BenchmarkZstdEncodeDecodeLowEntropyLong-8 289 3523847 ns/op 10891887 B/op 40 allocs/op
BenchmarkZstdEncodeDecodeHighEntropyLong
BenchmarkZstdEncodeDecodeHighEntropyLong-8 298 3390952 ns/op 10894703 B/op 40 allocs/op
benchmark results reusing an existing zstd encoder/decoder
BenchmarkZstdEncodeDecodeLowEntropyLong
BenchmarkZstdEncodeDecodeLowEntropyLong-8 55628 22883 ns/op 19220 B/op 2 allocs/op
BenchmarkZstdEncodeDecodeHighEntropyLong
BenchmarkZstdEncodeDecodeHighEntropyLong-8 47652 25064 ns/op 31553 B/op 3 allocs/op
*/

func BenchmarkZstdEncodeDecodeLowEntropyLong(b *testing.B) {

input := makeTestData(8762, func() byte { return 'a' })

codec, err := resolveCodec(ZStandard, 0)
require.NoError(b, err)

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
compressed := codec.Encode(input)
_, decodeErr := codec.Decode(compressed)
require.NoError(b, decodeErr)
}
}

func BenchmarkZstdEncodeDecodeHighEntropyLong(b *testing.B) {
input := makeTestData(8762, func() byte { return byte(rand.Uint32()) })

codec, err := resolveCodec(ZStandard, 0)
require.NoError(b, err)

b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
compressed := codec.Encode(input)
_, decodeErr := codec.Decode(compressed)
require.NoError(b, decodeErr)
}
}

func verifyZstdEncodeDecode(t *testing.T, input []byte) {
codec, err := resolveCodec(ZStandard, 0)
require.NoError(t, err)

compressed := codec.Encode(input)
actual, decodeErr := codec.Decode(compressed)

require.NoError(t, decodeErr)
assert.Equal(t, input, actual)
}

func makeTestData(length int, charMaker func() byte) []byte {
input := make([]byte, length)
for i := 0; i < length; i++ {
input[i] = charMaker()
}
return input
}

0 comments on commit f8a0492

Please sign in to comment.