Skip to content

Commit

Permalink
Fielddata: Merge ordinals APIs into BytesValues.WithOrdinals.
Browse files Browse the repository at this point in the history
Mid-term we should switch from `BytesValues` to Lucene's doc values APIs, in
particular the `SortedSetDocValues` class. While `BytesValues.WithOrdinals` and
SortedSetDocValues expose the same functionality, `BytesValues.WithOrdinals`
exposes its ordinals via a different `Ordinals.Docs` object while
`SortedSetDocValues` exposes them on the same object as the one that holds the
values. This commit merges ordinals into `BytesValues.WithOrdinals` in order to
make both classes even closer.

Global ordinals were a bit tricky to migrate so I just changed them to use
Lucene's OrdinalMap that will soon (LUCENE-5767, scheduled for 4.9) have the
same optimizations as our global ordinals.

Close #6524
  • Loading branch information
jpountz committed Jun 19, 2014
1 parent 703dbff commit 9b02b50
Show file tree
Hide file tree
Showing 66 changed files with 627 additions and 1,751 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@ public BytesValues getBytesValues() {
if (isFloat) {
final DoubleValues values = getDoubleValues();
return new BytesValues(values.isMultiValued()) {
private final BytesRef scratch = new BytesRef();

@Override
public int setDocument(int docId) {
this.docId = docId;
return values.setDocument(docId);
}

Expand All @@ -67,10 +67,10 @@ public Order getOrder() {
} else {
final LongValues values = getLongValues();
return new BytesValues(values.isMultiValued()) {
private final BytesRef scratch = new BytesRef();

@Override
public int setDocument(int docId) {
this.docId = docId;
return values.setDocument(docId);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,29 +20,20 @@
package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.fielddata.ScriptDocValues.Strings;
import org.elasticsearch.index.fielddata.plain.AtomicFieldDataWithOrdinalsTermsEnum;

/**
* The thread safe {@link org.apache.lucene.index.AtomicReader} level cache of the data.
*/
public interface AtomicFieldData<Script extends ScriptDocValues> extends RamUsage {

/**
* If this method returns false, this means that no document has multiple values. However this method may return true even if all
* documents are single-valued. So this method is useful for performing optimizations when the single-value case makes the problem
* simpler but cannot be used to actually check whether this instance is multi-valued.
*/
boolean isMultiValued();

/**
* An upper limit of the number of unique values in this atomic field data.
*/
long getNumberUniqueValues();

/**
* Use a non thread safe (lightweight) view of the values as bytes.
*/
BytesValues getBytesValues();

/**
* Returns a "scripting" based values.
*/
Expand All @@ -55,6 +46,65 @@ public interface AtomicFieldData<Script extends ScriptDocValues> extends RamUsag

interface WithOrdinals<Script extends ScriptDocValues> extends AtomicFieldData<Script> {

public static final WithOrdinals<ScriptDocValues.Strings> EMPTY = new WithOrdinals<ScriptDocValues.Strings>() {

@Override
public Strings getScriptValues() {
return new ScriptDocValues.Strings(getBytesValues());
}

@Override
public void close() {
}

@Override
public long getMemorySizeInBytes() {
return 0;
}

@Override
public BytesValues.WithOrdinals getBytesValues() {
return new BytesValues.WithOrdinals(false) {

@Override
public int setDocument(int docId) {
return 0;
}

@Override
public long nextOrd() {
return MISSING_ORDINAL;
}

@Override
public BytesRef getValueByOrd(long ord) {
throw new UnsupportedOperationException();
}

@Override
public long getOrd(int docId) {
return MISSING_ORDINAL;
}

@Override
public long getMaxOrd() {
return 0;
}

@Override
public long currentOrd() {
return MISSING_ORDINAL;
}
};
}

@Override
public TermsEnum getTermsEnum() {
return new AtomicFieldDataWithOrdinalsTermsEnum(this);
}

};

/**
* Use a non thread safe (lightweight) view of the values as bytes.
* @param needsHashes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ public abstract class AtomicGeoPointFieldData<Script extends ScriptDocValues> im
public BytesValues getBytesValues() {
final GeoPointValues values = getGeoPointValues();
return new BytesValues(values.isMultiValued()) {
private final BytesRef scratch = new BytesRef();

@Override
public int setDocument(int docId) {
this.docId = docId;
return values.setDocument(docId);
}

Expand Down
59 changes: 33 additions & 26 deletions src/main/java/org/elasticsearch/index/fielddata/BytesValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,9 @@

package org.elasticsearch.index.fielddata;

import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;

/**
* A state-full lightweight per document set of <code>byte[]</code> values.
Expand All @@ -44,11 +43,7 @@ public abstract class BytesValues {
*/
public static final BytesValues EMPTY = new Empty();

private boolean multiValued;

protected final BytesRef scratch = new BytesRef();

protected int docId = -1;
private final boolean multiValued;

/**
* Creates a new {@link BytesValues} instance
Expand Down Expand Up @@ -114,20 +109,41 @@ public AtomicFieldData.Order getOrder() {
*/
public static abstract class WithOrdinals extends BytesValues {

protected final Docs ordinals;
public static final long MIN_ORDINAL = 0;
public static final long MISSING_ORDINAL = SortedSetDocValues.NO_MORE_ORDS;

protected WithOrdinals(Ordinals.Docs ordinals) {
super(ordinals.isMultiValued());
this.ordinals = ordinals;
protected WithOrdinals(boolean multiValued) {
super(multiValued);
}

/**
* Returns the associated ordinals instance.
* @return the associated ordinals instance.
* Returns total unique ord count;
*/
public Ordinals.Docs ordinals() {
return ordinals;
}
public abstract long getMaxOrd();

/**
* The ordinal that maps to the relevant docId. If it has no value, returns
* <tt>0</tt>.
*/
public abstract long getOrd(int docId);

/**
* Returns the next ordinal for the current docID set to {@link #setDocument(int)}.
* This method should only be called <tt>N</tt> times where <tt>N</tt> is the number
* returned from {@link #setDocument(int)}. If called more than <tt>N</tt> times the behavior
* is undefined.
*
* Note: This method will never return <tt>0</tt>.
*
* @return the next ordinal for the current docID set to {@link #setDocument(int)}.
*/
public abstract long nextOrd();

/**
* Returns the current ordinal in the iteration
* @return the current ordinal in the iteration
*/
public abstract long currentOrd();

/**
* Returns the value for the given ordinal.
Expand All @@ -137,18 +153,9 @@ public Ordinals.Docs ordinals() {
*/
public abstract BytesRef getValueByOrd(long ord);

@Override
public int setDocument(int docId) {
this.docId = docId;
int length = ordinals.setDocument(docId);
assert (ordinals.getOrd(docId) != Ordinals.MISSING_ORDINAL) == length > 0 : "Doc: [" + docId + "] hasValue: [" + (ordinals.getOrd(docId) != Ordinals.MISSING_ORDINAL) + "] but length is [" + length + "]";
return length;
}

@Override
public BytesRef nextValue() {
assert docId != -1;
return getValueByOrd(ordinals.nextOrd());
return getValueByOrd(nextOrd());
}
}

Expand Down
14 changes: 2 additions & 12 deletions src/main/java/org/elasticsearch/index/fielddata/DoubleValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
package org.elasticsearch.index.fielddata;

import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;

/**
* A state-full lightweight per document set of <code>double</code> values.
Expand Down Expand Up @@ -100,21 +98,13 @@ public AtomicFieldData.Order getOrder() {
*/
public static abstract class WithOrdinals extends DoubleValues {

protected final Docs ordinals;
protected final BytesValues.WithOrdinals ordinals;

protected WithOrdinals(Ordinals.Docs ordinals) {
protected WithOrdinals(BytesValues.WithOrdinals ordinals) {
super(ordinals.isMultiValued());
this.ordinals = ordinals;
}

/**
* Returns the associated ordinals instance.
* @return the associated ordinals instance.
*/
public Docs ordinals() {
return ordinals;
}

/**
* Returns the value for the given ordinal.
* @param ord the ordinal to lookup.
Expand Down
14 changes: 2 additions & 12 deletions src/main/java/org/elasticsearch/index/fielddata/LongValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
package org.elasticsearch.index.fielddata;

import org.elasticsearch.ElasticsearchIllegalStateException;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;

/**
* A state-full lightweight per document set of <code>long</code> values.
Expand Down Expand Up @@ -101,21 +99,13 @@ public AtomicFieldData.Order getOrder() {
*/
public static abstract class WithOrdinals extends LongValues {

protected final Docs ordinals;
protected final BytesValues.WithOrdinals ordinals;

protected WithOrdinals(Ordinals.Docs ordinals) {
protected WithOrdinals(BytesValues.WithOrdinals ordinals) {
super(ordinals.isMultiValued());
this.ordinals = ordinals;
}

/**
* Returns the associated ordinals instance.
* @return the associated ordinals instance.
*/
public Docs ordinals() {
return this.ordinals;
}

/**
* Returns the value for the given ordinal.
* @param ord the ordinal to lookup.
Expand Down
Loading

0 comments on commit 9b02b50

Please sign in to comment.