Skip to content

Commit

Permalink
AVRO-1695. Java: Fix GenericData#deepCopy() to support logical types.
Browse files Browse the repository at this point in the history
  • Loading branch information
cutting committed Aug 31, 2016
1 parent f01f61b commit be1639c
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 9 deletions.
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ Trunk (not yet released)
AVRO-1889: Update maven-shade-plugin to enable building in Java 8 on Mac.
(Sachin Goyal via blue)

AVRO-1695. Java: Fix GenericData#deepCopy() to support logical types.
(cutting)

BUG FIXES

AVRO-1741: Python3: Fix error when codec is not in the header.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Conversion;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalType;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
Expand Down Expand Up @@ -1018,15 +1019,31 @@ public Object getDefaultValue(Field field) {

/**
* Makes a deep copy of a value given its schema.
* <P>Logical types are converted to raw types, copied, then converted back.
* @param schema the schema of the value to deep copy.
* @param value the value to deep copy.
* @return a deep copy of the given value.
*/
@SuppressWarnings({ "rawtypes", "unchecked" })
public <T> T deepCopy(Schema schema, T value) {
if (value == null) return null;
LogicalType logicalType = schema.getLogicalType();
if (logicalType == null) // not a logical type -- use raw copy
return (T)deepCopyRaw(schema, value);
Conversion conversion = getConversionByClass(value.getClass());
if (conversion == null) // no conversion defined -- try raw copy
return (T)deepCopyRaw(schema, value);
// logical type with conversion: convert to raw, copy, then convert back to logical
Object raw = Conversions.convertToRawType(value, schema, logicalType, conversion);
Object copy = deepCopyRaw(schema, raw); // copy raw
return (T)Conversions.convertToLogicalType(copy, schema, logicalType, conversion);
}

private Object deepCopyRaw(Schema schema, Object value) {
if (value == null) {
return null;
}

switch (schema.getType()) {
case ARRAY:
List<Object> arrayValue = (List) value;
Expand All @@ -1035,7 +1052,7 @@ public <T> T deepCopy(Schema schema, T value) {
for (Object obj : arrayValue) {
arrayCopy.add(deepCopy(schema.getElementType(), obj));
}
return (T)arrayCopy;
return arrayCopy;
case BOOLEAN:
return value; // immutable
case BYTES:
Expand All @@ -1045,13 +1062,13 @@ public <T> T deepCopy(Schema schema, T value) {
byte[] bytesCopy = new byte[length];
byteBufferValue.get(bytesCopy, 0, length);
byteBufferValue.position(start);
return (T)ByteBuffer.wrap(bytesCopy, 0, length);
return ByteBuffer.wrap(bytesCopy, 0, length);
case DOUBLE:
return value; // immutable
case ENUM:
return (T)createEnum(value.toString(), schema);
return createEnum(value.toString(), schema);
case FIXED:
return (T)createFixed(null, ((GenericFixed) value).bytes(), schema);
return createFixed(null, ((GenericFixed) value).bytes(), schema);
case FLOAT:
return value; // immutable
case INT:
Expand All @@ -1066,7 +1083,7 @@ public <T> T deepCopy(Schema schema, T value) {
mapCopy.put((CharSequence)(deepCopy(STRINGS, entry.getKey())),
deepCopy(schema.getValueType(), entry.getValue()));
}
return (T)mapCopy;
return mapCopy;
case NULL:
return null;
case RECORD:
Expand All @@ -1080,21 +1097,21 @@ public <T> T deepCopy(Schema schema, T value) {
getField(value, name, pos, oldState));
setField(newRecord, name, pos, newValue, newState);
}
return (T)newRecord;
return newRecord;
case STRING:
// Strings are immutable
if (value instanceof String) {
return (T)value;
return value;
}

// Some CharSequence subclasses are mutable, so we still need to make
// a copy
else if (value instanceof Utf8) {
// Utf8 copy constructor is more efficient than converting
// to string and then back to Utf8
return (T)new Utf8((Utf8)value);
return new Utf8((Utf8)value);
}
return (T)new Utf8(value.toString());
return new Utf8(value.toString());
case UNION:
return deepCopy(
schema.getTypes().get(resolveUnion(schema, value)), value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,4 +232,65 @@ private <D> File write(GenericData model, Schema schema, D... data) throws IOExc

return file;
}

@Test
public void testCopyUuid() {
testCopy(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)),
UUID.randomUUID(),
GENERIC);
}

@Test
public void testCopyUuidRaw() {
testCopy(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)),
UUID.randomUUID().toString(), // use raw type
GenericData.get()); // with no conversions
}

@Test
public void testCopyDecimal() {
testCopy(LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)),
new BigDecimal("-34.34"),
GENERIC);
}

@Test
public void testCopyDecimalRaw() {
testCopy(LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)),
ByteBuffer.wrap(new BigDecimal("-34.34").unscaledValue().toByteArray()),
GenericData.get()); // no conversions
}

private void testCopy(Schema schema, Object value, GenericData model) {
// test direct copy of instance
checkCopy(value, model.deepCopy(schema, value), false);

// test nested in a record
Schema recordSchema = Schema.createRecord("X", "", "test", false);
List<Schema.Field> fields = new ArrayList<Schema.Field>();
fields.add(new Schema.Field("x", schema, "", null));
recordSchema.setFields(fields);

GenericRecordBuilder builder = new GenericRecordBuilder(recordSchema);
builder.set("x", value);
GenericData.Record record = builder.build();
checkCopy(record, model.deepCopy(recordSchema, record), true);

// test nested in array
Schema arraySchema = Schema.createArray(schema);
ArrayList array = new ArrayList(Arrays.asList(value));
checkCopy(array, model.deepCopy(arraySchema, array), true);

// test record nested in array
Schema recordArraySchema = Schema.createArray(recordSchema);
ArrayList recordArray = new ArrayList(Arrays.asList(record));
checkCopy(recordArray, model.deepCopy(recordArraySchema, recordArray), true);
}

private void checkCopy(Object original, Object copy, boolean notSame) {
if (notSame)
Assert.assertNotSame(original, copy);
Assert.assertEquals(original, copy);
}

}

0 comments on commit be1639c

Please sign in to comment.