Skip to content

Commit

Permalink
Add "strlen" extractionFn. (apache#3731)
Browse files Browse the repository at this point in the history
  • Loading branch information
gianm authored and b-slim committed Dec 2, 2016
1 parent 4c5d10f commit 102375d
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 4 deletions.
21 changes: 18 additions & 3 deletions docs/content/querying/dimensionspecs.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,13 @@ matches, otherwise returns null.

### Substring Extraction Function

Returns a substring of the dimension value starting from the supplied index and of the desired length. If the desired
length exceeds the length of the dimension value, the remainder of the string starting at index will be returned.
If index is greater than the length of the dimension value, null will be returned.
Returns a substring of the dimension value starting from the supplied index and of the desired length. Both index
and length are measured in the number of Unicode code units present in the string as if it were encoded in UTF-16.
Note that some Unicode characters may be represented by two code units. This is the same behavior as the Java String
class's "substring" method.

If the desired length exceeds the length of the dimension value, the remainder of the string starting at index will
be returned. If index is greater than the length of the dimension value, null will be returned.

```json
{ "type" : "substring", "index" : 1, "length" : 4 }
Expand All @@ -165,6 +169,17 @@ or null if index greater than the length of the dimension value.
{ "type" : "substring", "index" : 3 }
```

### Strlen Extraction Function

Returns the length of dimension values, as measured in the number of Unicode code units present in the string as if it
were encoded in UTF-16. Note that some Unicode characters may be represented by two code units. This is the same
behavior as the Java String class's "length" method.

null strings are considered as having zero length.

```json
{ "type" : "strlen" }
```

### Time Format Extraction Function

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,4 +38,5 @@ public class ExtractionCacheHelper
public static final byte CACHE_TYPE_ID_UPPER = 0xB;
public static final byte CACHE_TYPE_ID_LOWER = 0xC;
public static final byte CACHE_TYPE_ID_BUCKET = 0xD;
public static final byte CACHE_TYPE_ID_STRLEN = 0xE;
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
@JsonSubTypes.Type(name = "stringFormat", value = StringFormatExtractionFn.class),
@JsonSubTypes.Type(name = "upper", value = UpperExtractionFn.class),
@JsonSubTypes.Type(name = "lower", value = LowerExtractionFn.class),
@JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class)
@JsonSubTypes.Type(name = "bucket", value = BucketExtractionFn.class),
@JsonSubTypes.Type(name = "strlen", value = StrlenExtractionFn.class)
})
/**
* An ExtractionFn is a function that can be used to transform the values of a column (typically a dimension)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.extraction;

import com.fasterxml.jackson.annotation.JsonCreator;

public class StrlenExtractionFn extends DimExtractionFn
{
private static final StrlenExtractionFn INSTANCE = new StrlenExtractionFn();

private StrlenExtractionFn()
{
}

@JsonCreator
public static StrlenExtractionFn instance()
{
return INSTANCE;
}

@Override
public String apply(String value)
{
return String.valueOf(value == null ? 0 : value.length());
}

@Override
public boolean preservesOrdering()
{
return false;
}

@Override
public ExtractionType getExtractionType()
{
return ExtractionType.MANY_TO_ONE;
}

@Override
public byte[] getCacheKey()
{
return new byte[]{ExtractionCacheHelper.CACHE_TYPE_ID_STRLEN};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package io.druid.query.extraction;

import com.fasterxml.jackson.databind.ObjectMapper;
import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;

public class StrlenExtractionFnTest
{
@Test
public void testApply()
{
Assert.assertEquals("0", StrlenExtractionFn.instance().apply(null));
Assert.assertEquals("0", StrlenExtractionFn.instance().apply(""));
Assert.assertEquals("1", StrlenExtractionFn.instance().apply("x"));
Assert.assertEquals("3", StrlenExtractionFn.instance().apply("foo"));
Assert.assertEquals("3", StrlenExtractionFn.instance().apply("föo"));
Assert.assertEquals("2", StrlenExtractionFn.instance().apply("\uD83D\uDE02"));
Assert.assertEquals("1", StrlenExtractionFn.instance().apply(1));
Assert.assertEquals("2", StrlenExtractionFn.instance().apply(-1));
}

@Test
public void testGetCacheKey()
{
Assert.assertArrayEquals(StrlenExtractionFn.instance().getCacheKey(), StrlenExtractionFn.instance().getCacheKey());
}

@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();

final String json = "{ \"type\" : \"strlen\" }";

StrlenExtractionFn extractionFn = (StrlenExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
StrlenExtractionFn extractionFnRoundTrip = (StrlenExtractionFn) objectMapper.readValue(
objectMapper.writeValueAsString(extractionFn),
ExtractionFn.class
);

// Should all actually be the same instance.
Assert.assertTrue(extractionFn == extractionFnRoundTrip);
Assert.assertTrue(extractionFn == StrlenExtractionFn.instance());
}
}

0 comments on commit 102375d

Please sign in to comment.