Index field names of documents.

The `exists` and `missing` filters need to merge postings lists of all existing terms, which can be very costly, especially on high-cardinality fields. This commit indexes the field names of a document under `_field_names` and reuses it to speed up the `exists` and `missing` filters. This is only enabled for indices that are created on or after Elasticsearch 1.3.0. Close #5659
abhijitiitr · Jun 19, 2014 · 703dbff · 703dbff
1 parent e2da211
commit 703dbff
Show file tree

Hide file tree

Showing 14 changed files with 507 additions and 8 deletions.
diff --git a/docs/reference/mapping/fields.asciidoc b/docs/reference/mapping/fields.asciidoc
@@ -21,6 +21,8 @@ include::fields/boost-field.asciidoc[]
 
 include::fields/parent-field.asciidoc[]
 
+include::fields/field-names-field.asciidoc[]
+
 include::fields/routing-field.asciidoc[]
 
 include::fields/index-field.asciidoc[]

diff --git a/docs/reference/mapping/fields/field-names-field.asciidoc b/docs/reference/mapping/fields/field-names-field.asciidoc
@@ -0,0 +1,11 @@
+[[mapping-field-names-field]]
+=== `_field_names`
+
+coming[1.3.0]
+
+The `_field_names` field indexes the field names of a document, which can later
+be used to search for documents based on the fields that they contain typically
+using the `exists` and `missing` filters.
+
+`_field_names` is indexed by default for indices that have been created after
+Elasticsearch 1.3.0.
diff --git a/src/main/java/org/elasticsearch/Version.java b/src/main/java/org/elasticsearch/Version.java
@@ -19,12 +19,14 @@
 
 package org.elasticsearch;
 
+import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.inject.AbstractModule;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.monitor.jvm.JvmInfo;
 
 import java.io.IOException;
@@ -344,6 +346,15 @@ public static Version fromId(int id) {
         }
     }
 
+    /**
+     * Return the {@link Version} of Elasticsearch that has been used to create an index given its settings.
+     */
+    public static Version indexCreated(Settings indexSettings) {
+        assert indexSettings.get(IndexMetaData.SETTING_UUID) == null // if the UUDI is there the index has actually been created otherwise this might be a test
+                || indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null : IndexMetaData.SETTING_VERSION_CREATED + " not set in IndexSettings";
+        return indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
+    }
+
     public static void writeVersion(Version version, StreamOutput out) throws IOException {
         out.writeVInt(version.id);
     }

diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java
@@ -180,6 +180,8 @@ public Builder(String index, @Nullable Settings indexSettings, RootObjectMapper.
             this.rootMappers.put(TTLFieldMapper.class, new TTLFieldMapper());
             this.rootMappers.put(VersionFieldMapper.class, new VersionFieldMapper());
             this.rootMappers.put(ParentFieldMapper.class, new ParentFieldMapper());
+            // _field_names last so that it can see all other fields
+            this.rootMappers.put(FieldNamesFieldMapper.class, new FieldNamesFieldMapper(indexSettings));
         }
 
         public Builder meta(ImmutableMap<String, Object> meta) {

diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapperParser.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapperParser.java
@@ -21,9 +21,7 @@
 
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
-import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.Version;
-import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Strings;
 import org.elasticsearch.common.collect.MapBuilder;
@@ -51,7 +49,6 @@
 
 import java.util.Iterator;
 import java.util.Map;
-import java.util.Set;
 
 import static org.elasticsearch.index.mapper.MapperBuilders.doc;
 
@@ -122,10 +119,9 @@ public DocumentMapperParser(Index index, @IndexSettings Settings indexSettings,
                 .put(UidFieldMapper.NAME, new UidFieldMapper.TypeParser())
                 .put(VersionFieldMapper.NAME, new VersionFieldMapper.TypeParser())
                 .put(IdFieldMapper.NAME, new IdFieldMapper.TypeParser())
+                .put(FieldNamesFieldMapper.NAME, new FieldNamesFieldMapper.TypeParser())
                 .immutableMap();
-        assert indexSettings.get(IndexMetaData.SETTING_UUID) == null // if the UUDI is there the index has actually been created otherwise this might be a test
-                || indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null) != null : IndexMetaData.SETTING_VERSION_CREATED + " not set in IndexSettings";
-        indexVersionCreated = indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT);
+        indexVersionCreated = Version.indexCreated(indexSettings);
     }
 
     public void putTypeParser(String type, Mapper.TypeParser typeParser) {

diff --git a/src/main/java/org/elasticsearch/index/mapper/MapperBuilders.java b/src/main/java/org/elasticsearch/index/mapper/MapperBuilders.java
@@ -74,6 +74,10 @@ public static TypeFieldMapper.Builder type() {
         return new TypeFieldMapper.Builder();
     }
 
+    public static FieldNamesFieldMapper.Builder fieldNames() {
+        return new FieldNamesFieldMapper.Builder();
+    }
+
     public static IndexFieldMapper.Builder index() {
         return new IndexFieldMapper.Builder();
     }

diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java
@@ -0,0 +1,248 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.index.mapper.internal;
+
+import com.google.common.collect.UnmodifiableIterator;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.XStringField;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
+import org.elasticsearch.common.Nullable;
+import org.elasticsearch.common.lucene.Lucene;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatProvider;
+import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
+import org.elasticsearch.index.fielddata.FieldDataType;
+import org.elasticsearch.index.mapper.*;
+import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+
+import static org.elasticsearch.index.mapper.MapperBuilders.fieldNames;
+import static org.elasticsearch.index.mapper.core.TypeParsers.parseField;
+
+/**
+ * A mapper that indexes the field names of a document under <code>_field_names</code>. This mapper is typically useful in order
+ * to have fast <code>exists</code> and <code>missing</code> queries/filters.
+ *
+ * Added in Elasticsearch 1.3.
+ */
+public class FieldNamesFieldMapper extends AbstractFieldMapper<String> implements InternalMapper, RootMapper {
+
+    public static final String NAME = "_field_names";
+
+    public static final String CONTENT_TYPE = "_field_names";
+
+    public static class Defaults extends AbstractFieldMapper.Defaults {
+        public static final String NAME = FieldNamesFieldMapper.NAME;
+        public static final String INDEX_NAME = FieldNamesFieldMapper.NAME;
+
+        public static final FieldType FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE);
+        public static final FieldType FIELD_TYPE_PRE_1_3_0;
+
+        static {
+            FIELD_TYPE.setIndexed(true);
+            FIELD_TYPE.setTokenized(false);
+            FIELD_TYPE.setStored(false);
+            FIELD_TYPE.setOmitNorms(true);
+            FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY);
+            FIELD_TYPE.freeze();
+            FIELD_TYPE_PRE_1_3_0 = new FieldType(FIELD_TYPE);
+            FIELD_TYPE_PRE_1_3_0.setIndexed(false);
+            FIELD_TYPE_PRE_1_3_0.freeze();
+        }
+    }
+
+    public static class Builder extends AbstractFieldMapper.Builder<Builder, FieldNamesFieldMapper> {
+
+        private boolean indexIsExplicit;
+
+        public Builder() {
+            super(Defaults.NAME, new FieldType(Defaults.FIELD_TYPE));
+            indexName = Defaults.INDEX_NAME;
+        }
+
+        @Override
+        public Builder index(boolean index) {
+            indexIsExplicit = true;
+            return super.index(index);
+        }
+
+        @Override
+        public FieldNamesFieldMapper build(BuilderContext context) {
+            if ((context.indexCreatedVersion() == null || context.indexCreatedVersion().before(Version.V_1_3_0)) && !indexIsExplicit) {
+                fieldType.setIndexed(false);
+            }
+            return new FieldNamesFieldMapper(name, indexName, boost, fieldType, postingsProvider, docValuesProvider, fieldDataSettings, context.indexSettings());
+        }
+    }
+
+    public static class TypeParser implements Mapper.TypeParser {
+        @Override
+        public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
+            FieldNamesFieldMapper.Builder builder = fieldNames();
+            parseField(builder, builder.name, node, parserContext);
+            return builder;
+        }
+    }
+
+    private final FieldType defaultFieldType;
+
+    private static FieldType defaultFieldType(Settings indexSettings) {
+        return indexSettings != null && Version.indexCreated(indexSettings).onOrAfter(Version.V_1_3_0) ? Defaults.FIELD_TYPE : Defaults.FIELD_TYPE_PRE_1_3_0;
+    }
+
+    public FieldNamesFieldMapper(Settings indexSettings) {
+        this(Defaults.NAME, Defaults.INDEX_NAME, indexSettings);
+    }
+
+    protected FieldNamesFieldMapper(String name, String indexName, Settings indexSettings) {
+        this(name, indexName, Defaults.BOOST, new FieldType(defaultFieldType(indexSettings)), null, null, null, indexSettings);
+    }
+
+    public FieldNamesFieldMapper(String name, String indexName, float boost, FieldType fieldType, PostingsFormatProvider postingsProvider,
+                           DocValuesFormatProvider docValuesProvider, @Nullable Settings fieldDataSettings, Settings indexSettings) {
+        super(new Names(name, indexName, indexName, name), boost, fieldType, null, Lucene.KEYWORD_ANALYZER,
+                Lucene.KEYWORD_ANALYZER, postingsProvider, docValuesProvider, null, null, fieldDataSettings, indexSettings);
+        this.defaultFieldType = defaultFieldType(indexSettings);
+    }
+
+    @Override
+    public FieldType defaultFieldType() {
+        return defaultFieldType;
+    }
+
+    @Override
+    public FieldDataType defaultFieldDataType() {
+        return new FieldDataType("string");
+    }
+
+    @Override
+    public String value(Object value) {
+        if (value == null) {
+            return null;
+        }
+        return value.toString();
+    }
+
+    @Override
+    public boolean useTermQueryWithQueryString() {
+        return true;
+    }
+
+    @Override
+    public void preParse(ParseContext context) throws IOException {
+    }
+
+    @Override
+    public void postParse(ParseContext context) throws IOException {
+        super.parse(context);
+    }
+
+    @Override
+    public void parse(ParseContext context) throws IOException {
+        // we parse in post parse
+    }
+
+    @Override
+    public boolean includeInObject() {
+        return false;
+    }
+
+    static Iterable<String> extractFieldNames(final String fullPath) {
+        return new Iterable<String>() {
+            @Override
+            public Iterator<String> iterator() {
+                return new UnmodifiableIterator<String>() {
+
+                    int endIndex = nextEndIndex(0);
+
+                    private int nextEndIndex(int index) {
+                        while (index < fullPath.length() && fullPath.charAt(index) != '.') {
+                            index += 1;
+                        }
+                        return index;
+                    }
+
+                    @Override
+                    public boolean hasNext() {
+                        return endIndex <= fullPath.length();
+                    }
+
+                    @Override
+                    public String next() {
+                        final String result = fullPath.substring(0, endIndex);
+                        endIndex = nextEndIndex(endIndex + 1);
+                        return result;
+                    }
+
+                };
+            }
+        };
+    }
+
+    @Override
+    protected void parseCreateField(ParseContext context, List<Field> fields) throws IOException {
+        if (!fieldType.indexed() && !fieldType.stored() && !hasDocValues()) {
+            return;
+        }
+        for (ParseContext.Document document : context.docs()) {
+            final List<String> paths = new ArrayList<>();
+            for (IndexableField field : document.getFields()) {
+                paths.add(field.name());
+            }
+            for (String path : paths) {
+                for (String fieldName : extractFieldNames(path)) {
+                    if (fieldType.indexed() || fieldType.stored()) {
+                        document.add(new XStringField(names().indexName(), fieldName, fieldType));
+                    }
+                    if (hasDocValues()) {
+                        document.add(new SortedSetDocValuesField(names().indexName(), new BytesRef(fieldName)));
+                    }
+                }
+            }
+        }
+    }
+
+    @Override
+    protected String contentType() {
+        return CONTENT_TYPE;
+    }
+
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        XContentBuilder json = XContentFactory.jsonBuilder();
+        super.toXContent(json, params);
+        if (json.string().equals("\"" + NAME + "\"{\"type\":\"" + CONTENT_TYPE + "\"}")) {
+            return builder;
+        }
+        return super.toXContent(builder, params);
+    }
+}
diff --git a/src/main/java/org/elasticsearch/index/query/ExistsFilterParser.java b/src/main/java/org/elasticsearch/index/query/ExistsFilterParser.java
@@ -27,7 +27,9 @@
 import org.elasticsearch.common.lucene.search.XBooleanFilter;
 import org.elasticsearch.common.xcontent.XContentParser;
 import org.elasticsearch.index.cache.filter.support.CacheKeyFilter;
+import org.elasticsearch.index.mapper.FieldMappers;
 import org.elasticsearch.index.mapper.MapperService;
+import org.elasticsearch.index.mapper.internal.FieldNamesFieldMapper;
 
 import java.io.IOException;
 import java.util.Set;
@@ -81,6 +83,8 @@ public Filter parse(QueryParseContext parseContext) throws IOException, QueryPar
     }
 
     public static Filter newFilter(QueryParseContext parseContext, String fieldPattern, String filterName) {
+        final FieldMappers fieldNamesMapper = parseContext.mapperService().indexName(FieldNamesFieldMapper.CONTENT_TYPE);
+
         MapperService.SmartNameObjectMapper smartNameObjectMapper = parseContext.smartObjectMapper(fieldPattern);
         if (smartNameObjectMapper != null && smartNameObjectMapper.hasMapper()) {
             // automatic make the object mapper pattern
@@ -101,7 +105,17 @@ public static Filter newFilter(QueryParseContext parseContext, String fieldPatte
                 nonNullFieldMappers = smartNameFieldMappers;
             }
             Filter filter = null;
-            if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
+            if (fieldNamesMapper!= null && fieldNamesMapper.mapper().fieldType().indexed()) {
+                final String f;
+                if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
+                    f = smartNameFieldMappers.mapper().names().indexName();
+                } else {
+                    f = field;
+                }
+                filter = fieldNamesMapper.mapper().termFilter(f, parseContext);
+            }
+            // if _field_names are not indexed, we need to go the slow way
+            if (filter == null && smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) {
                 filter = smartNameFieldMappers.mapper().rangeFilter(null, null, true, true, parseContext);
             }
             if (filter == null) {