Skip to content

Commit

Permalink
PARQUET-352: Add object model property to file footers.
Browse files Browse the repository at this point in the history
WriteSupport now has a getName getter method that is added to the footer
if it returns a non-null string as writer.model.name. This is intended
to help identify files written by object models incorrectly.

Author: Ryan Blue <[email protected]>

Closes apache#289 from rdblue/PARQUET-352-add-object-model-property and squashes the following commits:

23f8f67 [Ryan Blue] PARQUET-352: Add object model property to file footers.
  • Loading branch information
rdblue committed Dec 8, 2015
1 parent f2615d9 commit dcd1c33
Show file tree
Hide file tree
Showing 15 changed files with 76 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ public AvroWriteSupport(MessageType schema, Schema avroSchema,
this.model = model;
}

@Override
public String getName() {
return "avro";
}

/**
* @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ public class TupleWriteSupport extends WriteSupport<TupleEntry> {
private MessageType rootSchema;
public static final String PARQUET_CASCADING_SCHEMA = "parquet.cascading.schema";

@Override
public String getName() {
return "cascading";
}

@Override
public WriteContext init(Configuration configuration) {
String schema = configuration.get(PARQUET_CASCADING_SCHEMA);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ public void close() throws IOException, InterruptedException {
flushRowGroupToStore();
FinalizedWriteContext finalWriteContext = writeSupport.finalizeWrite();
Map<String, String> finalMetadata = new HashMap<String, String>(extraMetaData);
String modelName = writeSupport.getName();
if (modelName != null) {
finalMetadata.put(ParquetWriter.OBJECT_MODEL_NAME_PROP, modelName);
}
finalMetadata.putAll(finalWriteContext.getExtraMetaData());
parquetFileWriter.end(finalMetadata);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public class ParquetWriter<T> implements Closeable {
public static final WriterVersion DEFAULT_WRITER_VERSION =
WriterVersion.PARQUET_1_0;

public static final String OBJECT_MODEL_NAME_PROP = "writer.model.name";

// max size (bytes) to write as padding and the min size of a row group
public static final int MAX_PADDING_SIZE_DEFAULT = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,11 @@ public void write(T record) {
delegate.write(record);
}

@Override
public String getName() {
return delegate.getName();
}

@Override
public WriteSupport.FinalizedWriteContext finalizeWrite() {
return delegate.finalizeWrite();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,18 @@ public Map<String, String> getExtraMetaData() {
*/
public abstract void write(T record);

/**
* Called to get a name to identify the WriteSupport object model.
* If not null, this is added to the file footer metadata.
* <p>
* Defining this method will be required in a future API version.
*
* @return a String name for file metadata.
*/
public String getName() {
return null;
}

/**
* called once in the end after the last record was written
* @return information to be added in the file
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ public GroupWriteSupport() {
this.extraMetaData = extraMetaData;
}

@Override
public String getName() {
return "example";
}

@Override
public org.apache.parquet.hadoop.api.WriteSupport.WriteContext init(Configuration configuration) {
// if present, prefer the schema passed to the constructor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,9 @@ public void test() throws Exception {
}
}
}
assertEquals("Object model property should be example",
"example", footer.getFileMetaData().getKeyValueMetaData()
.get(ParquetWriter.OBJECT_MODEL_NAME_PROP));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@ public TupleWriteSupport(Schema pigSchema) {
this.rootPigSchema = pigSchema;
}

@Override
public String getName() {
return "pig";
}

public Schema getPigSchema() {
return rootPigSchema;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ public ProtoWriteSupport(Class<? extends Message> protobufClass) {
this.protoMessage = protobufClass;
}

@Override
public String getName() {
return "protobuf";
}

public static void setSchema(Configuration configuration, Class<? extends Message> protoClass) {
configuration.setClass(PB_CLASS_WRITE, protoClass, Message.class);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ public ScroogeWriteSupport(Class<T> thriftClass) {
super(thriftClass);
}

@Override
public String getName() {
return "scrooge";
}

@Override
protected StructType getThriftStruct() {
ScroogeStructConverter schemaConverter = new ScroogeStructConverter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ public TBaseWriteSupport(Class<T> thriftClass) {
super(thriftClass);
}

@Override
public String getName() {
return "thrift";
}

@Override
protected StructType getThriftStruct() {
return ThriftSchemaConverter.toStructType(thriftClass);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,11 @@ public ThriftBytesWriteSupport(TProtocolFactory protocolFactory, Class<? extends
}
}

@Override
public String getName() {
return "thrift";
}

@Override
public WriteContext init(Configuration configuration) {
if (this.protocolFactory == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ public ThriftWriteSupport(Class<T> thriftClass) {
this.writeSupport = new TBaseWriteSupport(thriftClass);
}

@Override
public String getName() {
return writeSupport.getName();
}

@Override
public WriteContext init(Configuration configuration) {
return this.writeSupport.init(configuration);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ public TupleToThriftWriteSupport(String className) {
this.className = className;
}

@Override
public String getName() {
return "thrift";
}

@SuppressWarnings({"rawtypes", "unchecked"})
@Override
public WriteContext init(Configuration configuration) {
Expand Down

0 comments on commit dcd1c33

Please sign in to comment.