Skip to content

Commit

Permalink
feat: support for Go int encoding/decoding into/from Avro long (h…
Browse files Browse the repository at this point in the history
  • Loading branch information
lovromazgon authored Jul 29, 2024
1 parent f8a0492 commit dfd5956
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 39 deletions.
63 changes: 34 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,35 +68,40 @@ More examples in the [godoc](https://pkg.go.dev/github.com/hamba/avro/v2).

#### Types Conversions

| Avro | Go Struct | Go Interface |
|-------------------------------|--------------------------------------------------------|--------------------------|
| `null` | `nil` | `nil` |
| `boolean` | `bool` | `bool` |
| `bytes` | `[]byte` | `[]byte` |
| `float` | `float32` | `float32` |
| `double` | `float64` | `float64` |
| `long` | `int64`, `uint32`\* | `int64`, `uint32` |
| `int` | `int`, `int32`, `int16`, `int8`, `uint8`\*, `uint16`\* | `int`, `uint8`, `uint16` |
| `fixed` | `uint64` | `uint64` |
| `string` | `string` | `string` |
| `array` | `[]T` | `[]any` |
| `enum` | `string` | `string` |
| `fixed` | `[n]byte` | `[n]byte` |
| `map` | `map[string]T{}` | `map[string]any` |
| `record` | `struct` | `map[string]any` |
| `union` | *see below* | *see below* |
| `int.date` | `time.Time` | `time.Time` |
| `int.time-millis` | `time.Duration` | `time.Duration` |
| `long.time-micros` | `time.Duration` | `time.Duration` |
| `long.timestamp-millis` | `time.Time` | `time.Time` |
| `long.timestamp-micros` | `time.Time` | `time.Time` |
| `long.local-timestamp-millis` | `time.Time` | `time.Time` |
| `long.local-timestamp-micros` | `time.Time` | `time.Time` |
| `bytes.decimal` | `*big.Rat` | `*big.Rat` |
| `fixed.decimal` | `*big.Rat` | `*big.Rat` |
| `string.uuid` | `string` | `string` |

\* Please note that when the Go type is an unsigned integer care must be taken to ensure that information is not lost
| Avro | Go Struct | Go Interface |
|-------------------------------|------------------------------------------------------------|--------------------------|
| `null` | `nil` | `nil` |
| `boolean` | `bool` | `bool` |
| `bytes` | `[]byte` | `[]byte` |
| `float` | `float32` | `float32` |
| `double` | `float64` | `float64` |
| `long` | `int`\*, `int64`, `uint32`\** | `int`, `int64`, `uint32` |
| `int` | `int`\*, `int32`, `int16`, `int8`, `uint8`\**, `uint16`\** | `int`, `uint8`, `uint16` |
| `fixed` | `uint64` | `uint64` |
| `string` | `string` | `string` |
| `array` | `[]T` | `[]any` |
| `enum` | `string` | `string` |
| `fixed` | `[n]byte` | `[n]byte` |
| `map` | `map[string]T{}` | `map[string]any` |
| `record` | `struct` | `map[string]any` |
| `union` | *see below* | *see below* |
| `int.date` | `time.Time` | `time.Time` |
| `int.time-millis` | `time.Duration` | `time.Duration` |
| `long.time-micros` | `time.Duration` | `time.Duration` |
| `long.timestamp-millis` | `time.Time` | `time.Time` |
| `long.timestamp-micros` | `time.Time` | `time.Time` |
| `long.local-timestamp-millis` | `time.Time` | `time.Time` |
| `long.local-timestamp-micros` | `time.Time` | `time.Time` |
| `bytes.decimal` | `*big.Rat` | `*big.Rat` |
| `fixed.decimal` | `*big.Rat` | `*big.Rat` |
| `string.uuid` | `string` | `string` |

\* Please note that the size of the Go type `int` is platform dependent. Decoding an Avro `long` into a Go `int` is
only allowed on 64-bit platforms and will result in an error on 32-bit platforms. Similarly, be careful when encoding a
Go `int` using Avro `int` on a 64-bit platform, as that can result in an integer overflow causing misinterpretation of
the data.

\** Please note that when the Go type is an unsigned integer care must be taken to ensure that information is not lost
when converting between the Avro type and Go type. For example, storing a *negative* number in Avro of `int = -100`
would be interpreted as `uint16 = 65,436` in Go. Another example would be storing numbers in Avro `int = 256` that
are larger than the Go type `uint8 = 0`.
Expand Down
22 changes: 15 additions & 7 deletions codec_native.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"math/big"
"reflect"
"strconv"
"time"
"unsafe"

Expand All @@ -21,10 +22,15 @@ func createDecoderOfNative(schema *PrimitiveSchema, typ reflect2.Type) ValDecode
return &boolCodec{}

case reflect.Int:
if schema.Type() != Int {
break
switch schema.Type() {
case Int:
return &intCodec[int]{}
case Long:
if strconv.IntSize == 64 {
// allow decoding into int when it's 64-bit
return &longCodec[int]{}
}
}
return &intCodec[int]{}

case reflect.Int8:
if schema.Type() != Int {
Expand Down Expand Up @@ -183,10 +189,12 @@ func createEncoderOfNative(schema Schema, typ reflect2.Type) ValEncoder {
return &boolCodec{}

case reflect.Int:
if schema.Type() != Int {
break
switch schema.Type() {
case Int:
return &intCodec[int]{}
case Long:
return &longCodec[int]{}
}
return &intCodec[int]{}

case reflect.Int8:
if schema.Type() != Int {
Expand Down Expand Up @@ -367,7 +375,7 @@ func (*intCodec[T]) Encode(ptr unsafe.Pointer, w *Writer) {
}

type largeInt interface {
~int32 | ~uint32 | int64
~int | ~int32 | ~uint32 | int64
}

type longCodec[T largeInt] struct{}
Expand Down
26 changes: 23 additions & 3 deletions decoder_native_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package avro_test
import (
"bytes"
"math/big"
"strconv"
"testing"
"time"

Expand Down Expand Up @@ -68,7 +69,7 @@ func TestDecoder_BoolEof(t *testing.T) {
assert.Error(t, err)
}

func TestDecoder_Int(t *testing.T) {
func TestDecoder_Int_Int(t *testing.T) {
defer ConfigTeardown()

data := []byte{0x36}
Expand All @@ -83,6 +84,25 @@ func TestDecoder_Int(t *testing.T) {
assert.Equal(t, 27, i)
}

func TestDecoder_Int_Long(t *testing.T) {
if strconv.IntSize != 64 {
t.Skipf("int size is %d, skipping test", strconv.IntSize)
}

defer ConfigTeardown()

data := []byte{0x80, 0x80, 0x80, 0x80, 0x10}
schema := "long"
dec, err := avro.NewDecoder(schema, bytes.NewReader(data))
require.NoError(t, err)

var i int
err = dec.Decode(&i)

require.NoError(t, err)
assert.Equal(t, 2147483648, i)
}

func TestDecoder_IntShortRead(t *testing.T) {
defer ConfigTeardown()

Expand Down Expand Up @@ -288,7 +308,7 @@ func TestDecoder_Uint32InvalidSchema(t *testing.T) {
func TestDecoder_Int64(t *testing.T) {
defer ConfigTeardown()

data := []byte{0x36}
data := []byte{0x80, 0x80, 0x80, 0x80, 0x10}
schema := "long"
dec, err := avro.NewDecoder(schema, bytes.NewReader(data))
require.NoError(t, err)
Expand All @@ -297,7 +317,7 @@ func TestDecoder_Int64(t *testing.T) {
err = dec.Decode(&i)

require.NoError(t, err)
assert.Equal(t, int64(27), i)
assert.Equal(t, int64(2147483648), i)
}

func TestDecoder_Int64ShortRead(t *testing.T) {
Expand Down
14 changes: 14 additions & 0 deletions encoder_native_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,20 @@ func TestEncoder_Int64FromInt32(t *testing.T) {
assert.Equal(t, []byte{0x36}, buf.Bytes())
}

func TestEncoder_Int64FromInt(t *testing.T) {
defer ConfigTeardown()

schema := "long"
buf := bytes.NewBuffer([]byte{})
enc, err := avro.NewEncoder(schema, buf)
require.NoError(t, err)

err = enc.Encode(2147483648)

require.NoError(t, err)
assert.Equal(t, []byte{0x80, 0x80, 0x80, 0x80, 0x10}, buf.Bytes())
}

func TestEncoder_Int64InvalidSchema(t *testing.T) {
defer ConfigTeardown()

Expand Down

0 comments on commit dfd5956

Please sign in to comment.