Skip to content

Commit

Permalink
record field default values for primitive types casted linkedin#139
Browse files Browse the repository at this point in the history
Because Avro schemas are represented in JSON, record field default
values were represented in JSON.  JSON has only a single data type for
numbers, which is floating point, and the Go standard library decodes
the JSON default value into a Go "float64" value, and was storing it
in the record field's default value.  Similarly, all record field
default values which have the Avro "bytes" data type were being stored
as Go "string" data type values.  And so forth for other default
values.

Later on when decoding values that were encoded using that schema,
`goavro` would return the Go data type for that value stored when
compiling the schema.  Even when the data type for a record field
specified an Avro "int", `goavro` was returning a Go "float64" value.

This update addresses all primitive data types, casting them to the
expected Go data type corresponding to the Avro data type specified by
the schema.  Later on when decoding such values, the values will be
returned to the caller having the proper corresponding Go data type.

Further work remains to address complex Avro data types, such as for
records, enumerations, arrays, maps, and fixed.

Additionally, some edge cases remain regarding Avro union values.  If
a record field data type is an Avro union that can store either an
Avro "int" or an Avro "float", for example, `goavro` is still storing
the default value as a Go "float64" because all numbers are decoded
from the JSON schema as Go "float64" values.  This is not ideal, and
remains an open issue.

As mentioned by issue linkedin#139, the same handling for logical types also
remains additional work.

Furthermore, as issue linkedin#124 refers, it would be nice to be able to
provide the same handling for default values that are themselves
records or arrays.
  • Loading branch information
Karrick S. McDermott committed Jul 12, 2019
1 parent e4677c7 commit 1589033
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 43 deletions.
70 changes: 40 additions & 30 deletions record.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,55 +61,65 @@ func makeRecordCodec(st map[string]*Codec, enclosingNamespace string, schemaMap
}

if defaultValue, ok := fieldSchemaMap["default"]; ok {
switch fieldCodec.typeName.short() {
case "union":
// When codec is union, then default value ought to encode using
// first schema in union. NOTE: To support a null default
// value, the string literal "null" must be coerced to a `nil`
if defaultValue == "null" {
defaultValue = nil
}
// NOTE: To support record field default values, union schema
// set to the type name of first member
// TODO: change to schemaCanonical below
defaultValue = Union(fieldCodec.schemaOriginal, defaultValue)
case "long":
// When codec specifies a long, then ensure value is a JSON
// number (decodes as a float64), then cast into the correct
// type.
v, ok := defaultValue.(float64)
typeNameShort := fieldCodec.typeName.short()
switch typeNameShort {
case "boolean":
v, ok := defaultValue.(bool)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = int64(v)
case "int":
// When codec specifies an int, then ensure value is a JSON
// number (decodes as a float64), then cast into the correct
// type.
v, ok := defaultValue.(float64)
defaultValue = bool(v)
case "bytes":
v, ok := defaultValue.(string)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = int32(v)
defaultValue = []byte(v)
case "double":
// When codec specifies a double, then ensure value is a JSON
// number (decodes as a float64), then cast into the correct
// type.
v, ok := defaultValue.(float64)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = float64(v)
case "float":
// When codec specifies a float, then ensure value is a JSON
// number (decodes as a float64), then cast into the correct
// type.
v, ok := defaultValue.(float64)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = float32(v)
case "int":
v, ok := defaultValue.(float64)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = int32(v)
case "long":
v, ok := defaultValue.(float64)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = int64(v)
case "string":
v, ok := defaultValue.(string)
if !ok {
return nil, fmt.Errorf("Record %q field %q: default value ought to encode using field schema: %s", c.typeName, fieldName, err)
}
defaultValue = string(v)
case "union":
// When codec is union, then default value ought to encode using
// first schema in union. NOTE: To support a null default
// value, the string literal "null" must be coerced to a `nil`
if defaultValue == "null" {
defaultValue = nil
}
// NOTE: To support record field default values, union schema
// set to the type name of first member
// TODO: change to schemaCanonical below
defaultValue = Union(fieldCodec.schemaOriginal, defaultValue)
// default:
// debug("fieldName: %q; type: %q; defaultValue: %T(%#v)\n", fieldName, c.typeName, defaultValue, defaultValue)
}

// attempt to encode default value using codec
_, err = fieldCodec.binaryFromNative(nil, defaultValue)
if err != nil {
Expand Down
43 changes: 30 additions & 13 deletions record_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -613,39 +613,55 @@ func ExampleTextualFromNative() {
}

func TestRecordFieldFixedDefaultValue(t *testing.T) {
testSchemaValid(t, `{"type": "record", "name": "r1", "fields":[{"name": "f1", "type": {"type": "fixed", "name": "fix", "size": 1}, "default": "\u0000"}]}`)
testSchemaValid(t, `{"type": "record", "name": "r1", "fields":[{"name": "f1", "type": {"type": "fixed", "name": "someFixed", "size": 1}, "default": "\u0000"}]}`)
}

func TestRecordFieldLongDefaultValue(t *testing.T) {
func TestRecordFieldDefaultValueTypes(t *testing.T) {
t.Run("success", func(t *testing.T) {
codec, err := NewCodec(`{"type": "record", "name": "r1", "fields":[{"name": "someLong", "type": "long", "default": 0},{"name": "someInt", "type": "int", "default": 0},{"name": "someFloat", "type": "float", "default": 0},{"name": "someDouble", "type": "double", "default": 0}]}`)
codec, err := NewCodec(`{"type": "record", "name": "r1", "fields":[{"name": "someBoolean", "type": "boolean", "default": true},{"name": "someBytes", "type": "bytes", "default": "0"},{"name": "someDouble", "type": "double", "default": 0},{"name": "someFloat", "type": "float", "default": 0},{"name": "someInt", "type": "int", "default": 0},{"name": "someLong", "type": "long", "default": 0},{"name": "someString", "type": "string", "default": "0"}]}`)
ensureError(t, err)

r1, _, err := codec.NativeFromTextual([]byte("{}"))
ensureError(t, err)

r1m := r1.(map[string]interface{})

someLong := r1m["someLong"]
if _, ok := someLong.(int64); !ok {
t.Errorf("GOT: %T; WANT: int64", someLong)
someBoolean := r1m["someBoolean"]
if _, ok := someBoolean.(bool); !ok {
t.Errorf("GOT: %T; WANT: []byte", someBoolean)
}

someInt := r1m["someInt"]
if _, ok := someInt.(int32); !ok {
t.Errorf("GOT: %T; WANT: int32", someInt)
someBytes := r1m["someBytes"]
if _, ok := someBytes.([]byte); !ok {
t.Errorf("GOT: %T; WANT: []byte", someBytes)
}

someDouble := r1m["someDouble"]
if _, ok := someDouble.(float64); !ok {
t.Errorf("GOT: %T; WANT: float64", someDouble)
}

someFloat := r1m["someFloat"]
if _, ok := someFloat.(float32); !ok {
t.Errorf("GOT: %T; WANT: float32", someFloat)
}

someDouble := r1m["someDouble"]
if _, ok := someDouble.(float64); !ok {
t.Errorf("GOT: %T; WANT: float64", someDouble)
someInt := r1m["someInt"]
if _, ok := someInt.(int32); !ok {
t.Errorf("GOT: %T; WANT: int32", someInt)
}

someLong := r1m["someLong"]
if _, ok := someLong.(int64); !ok {
t.Errorf("GOT: %T; WANT: int64", someLong)
}

someString := r1m["someString"]
if _, ok := someString.(string); !ok {
t.Errorf("GOT: %T; WANT: string", someString)
}
})

t.Run("provided default is wrong type", func(t *testing.T) {
t.Run("long", func(t *testing.T) {
_, err := NewCodec(`{"type": "record", "name": "r1", "fields":[{"name": "someLong", "type": "long", "default": "0"},{"name": "someInt", "type": "int", "default": 0},{"name": "someFloat", "type": "float", "default": 0},{"name": "someDouble", "type": "double", "default": 0}]}`)
Expand All @@ -664,8 +680,9 @@ func TestRecordFieldLongDefaultValue(t *testing.T) {
ensureError(t, err, "field schema")
})
})

t.Run("union of int and long", func(t *testing.T) {
t.Skip("should encode default value as int64 rather than float64")
t.Skip("FIXME: should encode default value as int64 rather than float64")

codec, err := NewCodec(`{"type":"record","name":"r1","fields":[{"name":"f1","type":["int","long"],"default":13}]}`)
ensureError(t, err)
Expand Down

0 comments on commit 1589033

Please sign in to comment.