Skip to content

Commit

Permalink
AVRO-3257: IDL support for nullable types (apache#1411)
Browse files Browse the repository at this point in the history
* AVRO-3256: IDL type reference with annotation throws error

Previous versions would alter the referenced type when encountering an
annotation on (for example) a field type. This change makes references
read-only.

* AVRO-3256: Document new behavior of annotations

Documented that references to named types cannot be annotated. Also
described where annotations for named types should go.

Lastly, the example has been fixed to match this change, and now also
 contains various types of documentation.

* AVRO-3257: Add syntax for unions of null with a type

Added Kotlin-style syntax for optional types. `MyType?` compiles to the
same result as `union { null, MyType }`.

This commit includes the documentation update.

* AVRO-3257: Remove unused variable from JavaCC grammar

* AVRO-3257: Fluid unions for optional types

The syntax for optional types now put the null type in the unions based
on the default value (if any).

This commit includes the documentation update.

* AVRO-3257: Add comments explaining features

* Fix typo in HTML

Co-authored-by: Ryan Skraba <[email protected]>

Co-authored-by: Ryan Skraba <[email protected]>
  • Loading branch information
opwvhk and RyanSkraba authored Dec 22, 2021
1 parent 1aa963c commit 7c5d8df
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 16 deletions.
23 changes: 21 additions & 2 deletions doc/src/content/xdocs/idl.xml
Original file line number Diff line number Diff line change
Expand Up @@ -309,17 +309,36 @@ record Card {
<section id="unions">
<title>Unions</title>
<p>Union types are denoted as <code>union { typeA, typeB, typeC, ... }</code>. For example,
this record contains a string field that is optional (unioned with <code>null</code>):
this record contains a string field that is optional (unioned with <code>null</code>), and
a field containing either a precise or a imprecise number:
</p>
<source>
record RecordWithUnion {
union { null, string } optionalString;
union { decimal(12, 6), float } number;
}
</source>
<p>
Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the
JSON format; namely, a record may not contain multiple elements of the same type.
JSON format; namely, a record may not contain multiple elements of the same type. Also,
fields/parameters that use the union type and have a default parameter must specify a
default value of the same type as the <em>first</em> union type.
</p>
<p>Because it occurs so often, there is a special shorthand to denote a union of
<code>null</code> with another type. In the following snippet, the first three fields have
identical types:
</p>
<source>
record RecordWithUnion {
union { null, string } optionalString1 = null;
string? optionalString2 = null;
string? optionalString3; // No default value
string? optionalString4 = "something";
}
</source>
<p>Note that unlike explicit unions, the position of the <code>null</code> type is fluid; it will be
the first or last type depending on the default value (if any). So in the example above, all fields
are valid.</p>
</section> <!-- unions -->
</section> <!-- complex types -->
</section> <!-- how to define records -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ import org.apache.commons.lang3.StringEscapeUtils;
*/
public class Idl implements Closeable {
static JsonNodeFactory FACTORY = JsonNodeFactory.instance;
private static final String OPTIONAL_NULLABLE_TYPE_PROPERTY = "org.apache.avro.compiler.idl.Idl.NullableType.optional";

URI inputDir;
ClassLoader resourceLoader = null;
Expand Down Expand Up @@ -183,6 +184,32 @@ public class Idl implements Closeable {
return result;
}

/**
* For "optional schemas" (recognized by the marker property the NullableType
* production adds), ensure the null schema is in the right place.
*
* @param schema a schema
* @param defaultValue the intended default value
* @return the schema, or an optional schema with null in the right place
*/
private static Schema fixOptionalSchema(Schema schema, JsonNode defaultValue) {
final Object optionalType = schema.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY);
if (optionalType != null) {
// The schema is a union schema with 2 types: "null" and a non-"null" schema
Schema nullSchema = schema.getTypes().get(0);
Schema nonNullSchema = schema.getTypes().get(1);
boolean nonNullDefault = defaultValue != null && !defaultValue.isNull();

// Always return a new schema: this drops the marker property.
if (nonNullDefault) {
return Schema.createUnion(nonNullSchema, nullSchema);
} else {
return Schema.createUnion(nullSchema, nonNullSchema);
}
}
return schema;
}

}

PARSER_END(Idl)
Expand Down Expand Up @@ -1000,6 +1027,7 @@ TOKEN :
| < EQUALS: "=" >
| < DOT: "." >
| < DASH: "-" >
| < QUESTION_MARK: "?" >
}

TOKEN :
Expand Down Expand Up @@ -1349,7 +1377,8 @@ void VariableDeclarator(Schema type, List<Field> fields):
order = Field.Order.valueOf(getTextProp(key,props,token).toUpperCase());

boolean validate = SchemaResolver.isFullyResolvedSchema(type);
Field field = Accessor.createField(name, type, DocCommentHelper.getDoc(), defaultValue, validate, order);
Schema fieldType = fixOptionalSchema(type, defaultValue);
Field field = Accessor.createField(name, fieldType, DocCommentHelper.getDoc(), defaultValue, validate, order);
for (String key : props.keySet())
if ("order".equals(key)) { // already handled: ignore
} else if ("aliases".equals(key)) { // aliases
Expand Down Expand Up @@ -1440,11 +1469,6 @@ Schema Type():
( SchemaProperty(props) )*
s = UnannotatedType(props)
{
for (String key : props.keySet())
Accessor.addProp(s, key, props.get(key));
LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
if (logicalType != null)
logicalType.addToSchema(s);
return s;
}
}
Expand All @@ -1453,15 +1477,50 @@ Schema UnannotatedType(Map<String, JsonNode> props):
{
Schema s;
}
{
(
s = NullableType(props)
| (
s = UnionDefinition()
| s = ArrayType()
| s = MapType()
)
{
// NullableType also applies properties, inside any union with null it may create.
for (String key : props.keySet())
Accessor.addProp(s, key, props.get(key));
LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
if (logicalType != null)
logicalType.addToSchema(s);
}
)
{
return s;
}
}

Schema NullableType(Map<String, JsonNode> props):
{
Schema s;
boolean optional = false;
}
{
(
s = ReferenceType() { if (!props.isEmpty()) { throw error("Type references may not be annotated", token); } }
| s = PrimitiveType()
| s = UnionDefinition()
| s = ArrayType()
| s = MapType()
)
) [ <QUESTION_MARK> { optional = true; } ]
{
// By applying the properties here (before creating the union), type annotations modify the optional type instead of the union.
for (String key : props.keySet())
Accessor.addProp(s, key, props.get(key));
LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
if (logicalType != null)
logicalType.addToSchema(s);
if (optional) {
s = Schema.createUnion(Schema.create(Schema.Type.NULL), s);
// Add a marker property to the union (it will be removed when creating fields)
Accessor.addProp(s, OPTIONAL_NULLABLE_TYPE_PROPERTY, BooleanNode.TRUE);
}
return s;
}
}
Expand Down
13 changes: 10 additions & 3 deletions lang/java/compiler/src/test/idl/input/simple.avdl
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ protocol Simple {
A,
B,
C
} = C;
} = C; // C is the default value used when reading unknown values from another schema version (without it, reading throws an exception).

/** A TestRecord. */
@my-property({"key":3})
record TestRecord {
// Tests that keywords can also appear in identifiers.
@avro.java.`string`("String") string @order("ignore") name = "foo";

/** The kind of record. */
Expand All @@ -49,16 +50,21 @@ protocol Simple {

MD5 hash = "0000000000000000";

// A traditional optional field
union {null, MD5} @aliases(["hash", "hsh"]) nullableHash = null;

// These two fields parse correctly, but will brewak (be changed to strings) when serializing the protocol/schema as JSON.
double value = NaN;
float average = -Infinity;
date d = 0;
time_ms t = 0;
// An optional type with a non-null default value (results in a union with null last).
time_ms? t = 0;

@foo.bar("bar.foo") long l = 0;
// Arrays (and maps) may also have properties
@foo.bar.bar("foo.bar2") array<string> a = [];
union {null, @foo.foo.bar(42) @foo.foo.foo("3foo") string} prop = null;
// An optional type with a null default value (results in a union with null first).
@foo.foo.bar(42) @foo.foo.foo("3foo") string? prop = null;
}

/** An MD5 hash. */
Expand All @@ -70,6 +76,7 @@ protocol Simple {

/** method 'hello' takes @parameter 'greeting' */
string hello(string greeting);
// The value of TestRecord also contains defaults for fields not mentioned.
TestRecord echo(TestRecord `record` = {"name":"bar","kind":"BAR"});
/** method 'add' takes @parameter 'arg1' @parameter 'arg2' */
@specialProp("test")
Expand Down
2 changes: 1 addition & 1 deletion lang/java/compiler/src/test/idl/output/simple.avpr
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
"default": 0
}, {
"name": "t",
"type": {"type": "int", "logicalType": "time-millis"},
"type": [ {"type": "int", "logicalType": "time-millis"}, "null" ],
"default": 0
} , {
"name": "l",
Expand Down

0 comments on commit 7c5d8df

Please sign in to comment.