Skip to content

Commit

Permalink
Merge pull request linkedin#211 from dbr65/master
Browse files Browse the repository at this point in the history
Added support for custom logical types (validated-string using regular expressions)
  • Loading branch information
xmcqueen authored Sep 30, 2021
2 parents d2d1b7b + 4251964 commit 3544999
Show file tree
Hide file tree
Showing 4 changed files with 145 additions and 1 deletion.
2 changes: 2 additions & 0 deletions codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,8 @@ func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace
return makeDecimalBytesCodec(st, enclosingNamespace, schemaMap)
case "fixed.decimal":
return makeDecimalFixedCodec(st, enclosingNamespace, schemaMap)
case "string.validated-string":
return makeValidatedStringCodec(st, enclosingNamespace, schemaMap)
default:
if isLogicalType {
delete(schemaMap, "logicalType")
Expand Down
83 changes: 83 additions & 0 deletions logical_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ import (
"errors"
"fmt"
"math/big"
"regexp"
"strings"
"time"
)

type toNativeFn func([]byte) (interface{}, []byte, error)
type fromNativeFn func([]byte, interface{}) ([]byte, error)

var reFromPattern = make(map[string]*regexp.Regexp)

//////////////////////////////////////////////////////////////////////////////////////////////
// date logical type - to/from time.Time, time.UTC location
//////////////////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -335,6 +339,85 @@ func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, sche
return c, nil
}

func makeValidatedStringCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) {
pattern, ok := schemaMap["pattern"]
if !ok {
return nil, errors.New("cannot create validated-string logical type without pattern")
}

patternStr := strings.TrimSpace(pattern.(string))
if reFromPattern[patternStr] == nil {
var (
regexpr *regexp.Regexp
err error
)
if regexpr, err = regexp.Compile(patternStr); err != nil {
return nil, err
}

reFromPattern[patternStr] = regexpr
}

if _, ok := schemaMap["name"]; !ok {
schemaMap["name"] = "string.validated-string"
}

c, err := registerNewCodec(st, schemaMap, enclosingNamespace)
if err != nil {
return nil, err
}

c.binaryFromNative = validatedStringBinaryFromNative(c.binaryFromNative)
c.textualFromNative = validatedStringTextualFromNative(c.textualFromNative)
c.nativeFromBinary = validatedStringNativeFromBinary(c.nativeFromBinary, patternStr)
c.nativeFromTextual = validatedStringNativeFromTextual(c.nativeFromTextual, patternStr)
return c, nil
}

func validatedStringBinaryFromNative(fromNativeFn fromNativeFn) fromNativeFn {
return func(b []byte, d interface{}) ([]byte, error) {
return stringBinaryFromNative(b, d)
}
}

func validatedStringTextualFromNative(fromNativeFn fromNativeFn) fromNativeFn {
return func(b []byte, d interface{}) ([]byte, error) {
return stringTextualFromNative(b, d)
}
}

func validatedStringNativeFromBinary(fn toNativeFn, pattern string) toNativeFn {
return func(bytes []byte) (interface{}, []byte, error) {
fn, newBytes, err := stringNativeFromBinary(bytes)
if err != nil {
return nil, nil, err
}

result := fn.(string)
if ok := reFromPattern[pattern].MatchString(result); !ok {
return nil, bytes, fmt.Errorf("cannot match input string against validation pattern: %q does not match %q", result, pattern)
}

return fn, newBytes, nil
}
}

func validatedStringNativeFromTextual(fn toNativeFn, pattern string) toNativeFn {
return func(bytes []byte) (interface{}, []byte, error) {
fn, newBytes, err := stringNativeFromTextual(bytes)
if err != nil {
return nil, nil, err
}

result := fn.(string)
if ok := reFromPattern[pattern].MatchString(result); !ok {
return nil, bytes, fmt.Errorf("cannot match input string against validation pattern: %q does not match %q", result, pattern)
}

return fn, newBytes, nil
}
}

func padBytes(bytes []byte, fixedSize uint) []byte {
s := int(fixedSize)
padded := make([]byte, s, s)
Expand Down
59 changes: 59 additions & 0 deletions logical_type_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,65 @@ func TestDecimalBytesLogicalTypeInRecordEncode(t *testing.T) {
testBinaryCodecPass(t, schema, map[string]interface{}{"mydecimal": big.NewRat(617, 50)}, []byte("\x04\x04\xd2"))
}

func TestValidatedStringLogicalTypeInRecordEncode(t *testing.T) {
schema := `{
"type": "record",
"name": "myrecord",
"fields": [
{
"name": "number",
"doc": "Phone number inside the national network. Length between 4-14",
"type": {
"type": "string",
"logicalType": "validated-string",
"pattern": "^[\\d]{4,14}$"
}
}
]
}`

codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

// NOTE: May omit fields when using default value
textual := []byte(`{"number": "667777777"}`)

// Convert textual Avro data (in Avro JSON format) to native Go form
native, _, err := codec.NativeFromTextual(textual)
if err != nil {
t.Fatal(err)
}

// Convert native Go form to binary Avro data
binary, err := codec.BinaryFromNative(nil, native)
if err != nil {
t.Fatal(err)
}

testSchemaValid(t, schema)
testBinaryCodecPass(t, schema, map[string]interface{}{"number": "667777777"}, binary)

// Convert binary Avro data back to native Go form
native, _, err = codec.NativeFromBinary(binary)
if err != nil {
t.Fatal(err)
}

// Convert native Go form to textual Avro data
textual, err = codec.TextualFromNative(nil, native)
if err != nil {
t.Fatal(err)
}

// NOTE: Textual encoding will show all fields, even those with values that
// match their default values
if got, want := string(textual), "{\"number\":\"667777777\"}"; got != want {
t.Errorf("GOT: %v; WANT: %v", got, want)
}
}

func ExampleUnion_logicalType() {
// Supported logical types and their native go types:
// * timestamp-millis - time.Time
Expand Down
2 changes: 1 addition & 1 deletion name.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (e ErrInvalidName) Error() string {
// NOTE: This function designed to work with name components, after they have
// been split on the period rune.
func isRuneInvalidForFirstCharacter(r rune) bool {
return (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && r != '_'
return (r < 'A' || r > 'Z') && (r < 'a' || r > 'z') && r != '_' && r != '-'
}

func isRuneInvalidForOtherCharacters(r rune) bool {
Expand Down

0 comments on commit 3544999

Please sign in to comment.