Skip to content

Commit

Permalink
Support configuration for handling multi-valued dimension (apache#2541)
Browse files Browse the repository at this point in the history
* Support configuration for handling multi-valued dimension

* Addressed comments

* use MultiValueHandling.ofDefault() for missing policy
  • Loading branch information
navis authored and dclim committed Nov 4, 2016
1 parent 4203580 commit b99e14e
Show file tree
Hide file tree
Showing 20 changed files with 421 additions and 101 deletions.
46 changes: 44 additions & 2 deletions api/src/main/java/io/druid/data/input/impl/DimensionSchema.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,60 @@ public static ValueType fromString(String name)
}
}

public static enum MultiValueHandling
{
SORTED_ARRAY,
SORTED_SET,
ARRAY {
@Override
public boolean needSorting() { return false;}
};

public boolean needSorting()
{
return true;
}

@Override
@JsonValue
public String toString()
{
return name().toUpperCase();
}

@JsonCreator
public static MultiValueHandling fromString(String name)
{
return name == null ? ofDefault() : valueOf(name.toUpperCase());
}

// this can be system configuration
public static MultiValueHandling ofDefault()
{
return SORTED_ARRAY;
}
}

private final String name;
private final MultiValueHandling multiValueHandling;

protected DimensionSchema(String name)
protected DimensionSchema(String name, MultiValueHandling multiValueHandling)
{
this.name = Preconditions.checkNotNull(name, "Dimension name cannot be null.");
this.multiValueHandling = multiValueHandling;
}

@JsonProperty
public String getName()
{
return name;
};
}

@JsonProperty
public MultiValueHandling getMultiValueHandling()
{
return multiValueHandling;
}

@JsonIgnore
public abstract String getTypeName();
Expand Down
15 changes: 14 additions & 1 deletion api/src/main/java/io/druid/data/input/impl/DimensionsSpec.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,20 @@ public class DimensionsSpec
private final Set<String> dimensionExclusions;
private final Map<String, DimensionSchema> dimensionSchemaMap;

public static DimensionsSpec ofEmpty()
{
return new DimensionsSpec(null, null, null);
}

public static List<DimensionSchema> getDefaultSchemas(List<String> dimNames)
{
return getDefaultSchemas(dimNames, DimensionSchema.MultiValueHandling.ofDefault());
}

public static List<DimensionSchema> getDefaultSchemas(
final List<String> dimNames,
final DimensionSchema.MultiValueHandling multiValueHandling
)
{
return Lists.transform(
dimNames,
Expand All @@ -51,7 +64,7 @@ public static List<DimensionSchema> getDefaultSchemas(List<String> dimNames)
@Override
public DimensionSchema apply(String input)
{
return new StringDimensionSchema(input);
return new StringDimensionSchema(input, multiValueHandling);
}
}
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public FloatDimensionSchema(
@JsonProperty("name") String name
)
{
super(name);
super(name, null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public LongDimensionSchema(
@JsonProperty("name") String name
)
{
super(name);
super(name, null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public NewSpatialDimensionSchema(
@JsonProperty("dims") List<String> dims
)
{
super(name);
super(name, null);
this.dims = dims;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,23 @@
public class StringDimensionSchema extends DimensionSchema
{
@JsonCreator
public static StringDimensionSchema create(String name) {
public static StringDimensionSchema create(String name)
{
return new StringDimensionSchema(name);
}

@JsonCreator
public StringDimensionSchema(
@JsonProperty("name") String name
@JsonProperty("name") String name,
@JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling
)
{
super(name);
super(name, multiValueHandling);
}

public StringDimensionSchema(String name)
{
this(name, null);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,28 @@
package io.druid.segment;

import io.druid.java.util.common.IAE;
import io.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.ValueType;

public final class DimensionHandlerUtil
{
private DimensionHandlerUtil() {}

public static DimensionHandler getHandlerFromCapabilities(String dimensionName, ColumnCapabilities capabilities)
public static DimensionHandler getHandlerFromCapabilities(
String dimensionName,
ColumnCapabilities capabilities,
MultiValueHandling multiValueHandling
)
{
DimensionHandler handler = null;
if (capabilities.getType() == ValueType.STRING) {
if (!capabilities.isDictionaryEncoded() || !capabilities.hasBitmapIndexes()) {
throw new IAE("String column must have dictionary encoding and bitmap index.");
}
handler = new StringDimensionHandler(dimensionName);
// use default behavior
multiValueHandling = multiValueHandling == null ? MultiValueHandling.ofDefault() : multiValueHandling;
handler = new StringDimensionHandler(dimensionName, multiValueHandling);
}
if (handler == null) {
throw new IAE("Could not create handler from invalid column type: " + capabilities.getType());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ public interface DimensionIndexer<EncodedType extends Comparable<EncodedType>, E
* Get the minimum dimension value seen by this indexer.
*
* NOTE:
* On an in-memory segment (IncrementaIndex), we can determine min/max values by looking at the stream of
* On an in-memory segment (IncrementalIndex), we can determine min/max values by looking at the stream of
* row values seen in calls to processSingleRowValToIndexKey().
*
* However, on a disk-backed segment (QueryableIndex), the numeric dimensions do not currently have any
Expand Down Expand Up @@ -259,9 +259,11 @@ public Object makeColumnValueSelector(
*/
public int getUnsortedEncodedArrayHashCode(EncodedTypeArray key);

public static final boolean LIST = true;
public static final boolean ARRAY = false;

/**
* Given a row value array from a TimeAndDims key, as described in the documentatiion for compareUnsortedEncodedArrays(),
* Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(),
* convert the unsorted encoded values to a list or array of actual values.
*
* If the key has one element, this method should return a single Object instead of an array or list, ignoring
Expand All @@ -275,7 +277,7 @@ public Object makeColumnValueSelector(


/**
* Given a row value array from a TimeAndDims key, as described in the documentatiion for compareUnsortedEncodedArrays(),
* Given a row value array from a TimeAndDims key, as described in the documentation for compareUnsortedEncodedArrays(),
* convert the unsorted encoded values to an array of sorted encoded values (i.e., sorted by their corresponding actual values)
*
* @param key dimension value array from a TimeAndDims key
Expand Down
2 changes: 1 addition & 1 deletion processing/src/main/java/io/druid/segment/IndexMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,7 @@ protected DimensionHandler[] makeDimensionHandlers(final List<String> mergedDime
for (int i = 0; i < mergedDimensions.size(); i++) {
ColumnCapabilities capabilities = dimCapabilities.get(i);
String dimName = mergedDimensions.get(i);
handlers[i] = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities);
handlers[i] = DimensionHandlerUtil.getHandlerFromCapabilities(dimName, capabilities, null);
}
return handlers;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ private void initDimensionHandlers()
{
for (String dim : availableDimensions) {
ColumnCapabilities capabilities = getColumn(dim).getCapabilities();
DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities);
DimensionHandler handler = DimensionHandlerUtil.getHandlerFromCapabilities(dim, capabilities, null);
dimensionHandlers.put(dim, handler);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@

package io.druid.segment;

import com.google.common.base.Function;
import com.google.common.primitives.Ints;
import io.druid.java.util.common.logger.Logger;
import io.druid.data.input.impl.DimensionSchema.MultiValueHandling;
import io.druid.segment.column.Column;
import io.druid.segment.column.ColumnCapabilities;
import io.druid.segment.column.DictionaryEncodedColumn;
Expand All @@ -37,13 +36,13 @@

public class StringDimensionHandler implements DimensionHandler<Integer, int[], String>
{
private static final Logger log = new Logger(StringDimensionHandler.class);

private final String dimensionName;
private final MultiValueHandling multiValueHandling;

public StringDimensionHandler(String dimensionName)
public StringDimensionHandler(String dimensionName, MultiValueHandling multiValueHandling)
{
this.dimensionName = dimensionName;
this.multiValueHandling = multiValueHandling;
}

@Override
Expand Down Expand Up @@ -193,7 +192,7 @@ public Object getRowValueArrayFromColumn(Closeable column, int currRow)
@Override
public DimensionIndexer<Integer, int[], String> makeIndexer()
{
return new StringDimensionIndexer();
return new StringDimensionIndexer(multiValueHandling);
}

@Override
Expand All @@ -220,21 +219,6 @@ public DimensionMergerLegacy makeLegacyMerger(
return new StringDimensionMergerLegacy(dimensionName, indexSpec, outDir, ioPeon, capabilities, progress);
}

public static final Function<Object, String> STRING_TRANSFORMER = new Function<Object, String>()
{
@Override
public String apply(final Object o)
{
if (o == null) {
return null;
}
if (o instanceof String) {
return (String) o;
}
return o.toString();
}
};

public static final Comparator<Integer> ENCODED_COMPARATOR = new Comparator<Integer>()
{
@Override
Expand All @@ -250,18 +234,4 @@ public int compare(Integer o1, Integer o2)
}
};

public static final Comparator<String> UNENCODED_COMPARATOR = new Comparator<String>()
{
@Override
public int compare(String o1, String o2)
{
if (o1 == null) {
return o2 == null ? 0 : -1;
}
if (o2 == null) {
return 1;
}
return o1.compareTo(o2);
}
};
}
Loading

0 comments on commit b99e14e

Please sign in to comment.