From bb8edcf7353d820e5c3906554d208b62069fd5ed Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 22 Nov 2023 10:13:10 -0500 Subject: [PATCH] ESQL: Load text field from parent keyword field This adds support for loading a text field from a parent keyword field. The mapping for that looks like: ``` "properties": { "foo": { "type": "keyword", "fields": { "text": { "type": "text" } } } } ``` In this case it's safe to load the `text` subfield from the doc values for the `keyword` field above. --- .../index/mapper/BlockDocValuesReader.java | 5 ++ .../index/mapper/FieldTypeLookup.java | 7 +++ .../index/mapper/KeywordFieldMapper.java | 4 ++ .../index/mapper/MappedFieldType.java | 5 ++ .../index/mapper/MappingLookup.java | 7 +++ .../index/mapper/TextFieldMapper.java | 43 +++++++++++----- .../index/query/SearchExecutionContext.java | 7 +++ .../search/lookup/SearchLookup.java | 4 +- .../index/mapper/KeywordFieldMapperTests.java | 15 ++++-- .../index/mapper/TextFieldMapperTests.java | 50 +++++++++++++++++-- .../AbstractScriptFieldTypeTestCase.java | 5 ++ .../index/mapper/MapperTestCase.java | 22 ++++++-- .../compute/lucene/BlockReaderFactories.java | 5 ++ 13 files changed, 152 insertions(+), 27 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index e52e3177c6a54..11e57e030dfe7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -541,6 +541,11 @@ public boolean supportsOrdinals() { public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { return DocValues.getSortedSet(context.reader(), fieldName); } + + @Override + public String toString() { + return "BytesRefsFromOrds[" + fieldName + "]"; + } } private static class SingletonOrdinals extends BlockDocValuesReader { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java index b9ba0762e5117..2b4eec2bdd565 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldTypeLookup.java @@ -204,4 +204,11 @@ Set sourcePaths(String field) { return fieldToCopiedFields.containsKey(resolvedField) ? fieldToCopiedFields.get(resolvedField) : Set.of(resolvedField); } + + /** + * If field is a leaf multi-field return the path to the parent field. Otherwise, return null. + */ + public String parentField(String field) { + return fullSubfieldNameToParentPath.get(field); + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index caac5b7f3bfe0..b62113a586bba 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -822,6 +822,10 @@ public void validateMatchedRoutingPath(final String routingPath) { ); } } + + public boolean hasNormalizer() { + return normalizer != Lucene.KEYWORD_ANALYZER; + } } private final boolean indexed; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index b68bb1a2b1987..376cb1a10e2e6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -658,6 +658,11 @@ public interface BlockLoaderContext { * Find the paths in {@code _source} that contain values for the field named {@code name}. */ Set sourcePaths(String name); + + /** + * If field is a leaf multi-field return the path to the parent field. Otherwise, return null. + */ + String parentField(String field); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index 7c44f33fbafa5..4880ce5edc204 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -404,6 +404,13 @@ public Set sourcePaths(String field) { return fieldTypesLookup().sourcePaths(field); } + /** + * If field is a leaf multi-field return the path to the parent field. Otherwise, return null. + */ + public String parentField(String field) { + return fieldTypesLookup().parentField(field); + } + /** * Returns true if the index has mappings. An index does not have mappings only if it was created * without providing mappings explicitly, and no documents have yet been indexed in it. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 420f92cfbf847..1ae0489173ce3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -678,7 +678,8 @@ public TextFieldType( super(name, indexed, stored, false, tsi, meta); fielddata = false; this.isSyntheticSource = isSyntheticSource; - this.syntheticSourceDelegate = syntheticSourceDelegate; // TODO rename to "exactDelegate" or something + // TODO block loader could use a "fast loading" delegate which isn't always the same - but frequently is. + this.syntheticSourceDelegate = syntheticSourceDelegate; this.eagerGlobalOrdinals = eagerGlobalOrdinals; this.indexPhrases = indexPhrases; } @@ -946,21 +947,37 @@ protected String delegatingTo() { } }; } - if (isSyntheticSource) { - if (isStored()) { - return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name()); + /* + * If this is a sub-text field try and return the parent's loader. Text + * fields will always be slow to load and if the parent is exact then we + * should use that instead. + */ + String parentField = blContext.parentField(name()); + if (parentField != null) { + MappedFieldType parent = blContext.lookup().fieldType(parentField); + if (parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) { + KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent; + if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) { + return new BlockLoader.Delegating(kwd.blockLoader(blContext)) { + @Override + protected String delegatingTo() { + return kwd.name(); + } + }; + } } + } + if (isStored()) { + return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name()); + } + if (isSyntheticSource) { /* - * We *shouldn't fall to this exception. The mapping should be - * rejected because we've enabled synthetic source but not configured - * the index properly. But we give it a nice message anyway just in - * case. + * When we're in synthetic source mode we don't currently + * support text fields that are not stored and are not children + * of perfect keyword fields. We'd have to load from the parent + * field and then convert the result to a string. */ - throw new IllegalArgumentException( - "fetching values from a text field [" - + name() - + "] is not supported because synthetic _source is enabled and we don't have a way to load the fields" - ); + return null; } return new BlockSourceReader.BytesRefsBlockLoader(SourceValueFetcher.toString(blContext.sourcePaths(name()))); } diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java index c4806dbd3a0a8..143dfe7fe6e9d 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java @@ -334,6 +334,13 @@ public Set sourcePath(String fullName) { return mappingLookup.sourcePaths(fullName); } + /** + * If field is a leaf multi-field return the path to the parent field. Otherwise, return null. + */ + public String parentPath(String field) { + return mappingLookup.parentField(field); + } + /** * Will there be {@code _source}. */ diff --git a/server/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java index 06f71fbf2514d..f88441b32d08b 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java @@ -37,8 +37,8 @@ public class SearchLookup implements SourceProvider { * The chain of fields for which this lookup was created, used for detecting * loops caused by runtime fields referring to other runtime fields. The chain is empty * for the "top level" lookup created for the entire search. When a lookup is used to load - * fielddata for a field, we fork it and make sure the field name name isn't in the chain, - * then add it to the end. So the lookup for the a field named {@code a} will be {@code ["a"]}. If + * fielddata for a field, we fork it and make sure the field name isn't in the chain, + * then add it to the end. So the lookup for a field named {@code a} will be {@code ["a"]}. If * that field looks up the values of a field named {@code b} then * {@code b}'s chain will contain {@code ["a", "b"]}. */ diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index eafb33cd44cd4..d6e93fceb713e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -657,18 +657,25 @@ protected Function loadBlockExpected() { @Override protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) { assertFalse("keyword doesn't support ignore_malformed", ignoreMalformed); - return new KeywordSyntheticSourceSupport(randomBoolean(), usually() ? null : randomAlphaOfLength(2), true); + return new KeywordSyntheticSourceSupport( + randomBoolean() ? null : between(10, 100), + randomBoolean(), + usually() ? null : randomAlphaOfLength(2), + true + ); } static class KeywordSyntheticSourceSupport implements SyntheticSourceSupport { - private final Integer ignoreAbove = randomBoolean() ? null : between(10, 100); - private final boolean allIgnored = ignoreAbove != null && rarely(); + private final Integer ignoreAbove; + private final boolean allIgnored; private final boolean store; private final boolean docValues; private final String nullValue; private final boolean exampleSortsUsingIgnoreAbove; - KeywordSyntheticSourceSupport(boolean store, String nullValue, boolean exampleSortsUsingIgnoreAbove) { + KeywordSyntheticSourceSupport(Integer ignoreAbove, boolean store, String nullValue, boolean exampleSortsUsingIgnoreAbove) { + this.ignoreAbove = ignoreAbove; + this.allIgnored = ignoreAbove != null && rarely(); this.store = store; this.nullValue = nullValue; this.exampleSortsUsingIgnoreAbove = exampleSortsUsingIgnoreAbove; diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index b2a729d6868d2..0460108e565ce 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -46,6 +46,7 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.analysis.AnalyzerScope; @@ -1121,6 +1122,7 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) boolean storedKeywordField = storeTextField || randomBoolean(); String nullValue = storeTextField || usually() ? null : randomAlphaOfLength(2); KeywordFieldMapperTests.KeywordSyntheticSourceSupport keywordSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport( + randomBoolean() ? null : between(10, 100), storedKeywordField, nullValue, false == storeTextField @@ -1326,8 +1328,50 @@ public void testEmpty() throws Exception { } @Override - protected boolean supportsColumnAtATimeReader(MappedFieldType ft) { - TextFieldMapper.TextFieldType text = (TextFieldType) ft; - return text.syntheticSourceDelegate() != null && text.syntheticSourceDelegate().hasDocValues(); + protected boolean supportsColumnAtATimeReader(MapperService mapper, MappedFieldType ft) { + String parentName = mapper.mappingLookup().parentField(ft.name()); + if (parentName == null) { + TextFieldMapper.TextFieldType text = (TextFieldType) ft; + return text.syntheticSourceDelegate() != null && text.syntheticSourceDelegate().hasDocValues(); + } + MappedFieldType parent = mapper.fieldType(parentName); + if (false == parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) { + throw new UnsupportedOperationException(); + } + KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent; + return kwd.hasDocValues(); + } + + public void testBlockLoaderFromParentColumnReader() throws IOException { + testBlockLoaderFromParent(true, randomBoolean()); + } + + public void testBlockLoaderParentFromRowStrideReader() throws IOException { + testBlockLoaderFromParent(false, randomBoolean()); + } + + private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSource) throws IOException { + boolean storeParent = randomBoolean(); + KeywordFieldMapperTests.KeywordSyntheticSourceSupport kwdSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport( + null, + storeParent, + null, + false == storeParent + ); + SyntheticSourceExample example = kwdSupport.example(5); + CheckedConsumer buildFields = b -> { + b.startObject("field"); + { + example.mapping().accept(b); + b.startObject("fields").startObject("sub"); + { + b.field("type", "text"); + } + b.endObject().endObject(); + } + b.endObject(); + }; + MapperService mapper = createMapperService(syntheticSource ? syntheticSourceMapping(buildFields) : mapping(buildFields)); + testBlockLoader(columnReader, example, mapper, "field.sub"); } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java index 7eb2511f58206..e8f155849b090 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/AbstractScriptFieldTypeTestCase.java @@ -430,6 +430,11 @@ public SearchLookup lookup() { public Set sourcePaths(String name) { throw new UnsupportedOperationException(); } + + @Override + public String parentField(String field) { + throw new UnsupportedOperationException(); + } }; } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 7bc4ed62eef84..afc682583ecd8 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -1247,18 +1247,24 @@ public final void testBlockLoaderFromRowStrideReader() throws IOException { testBlockLoader(false); } - protected boolean supportsColumnAtATimeReader(MappedFieldType ft) { + protected boolean supportsColumnAtATimeReader(MapperService mapper, MappedFieldType ft) { return ft.hasDocValues(); } private void testBlockLoader(boolean columnReader) throws IOException { SyntheticSourceExample example = syntheticSourceSupport(false).example(5); - MapperService mapper = createMapperService(syntheticSourceMapping(b -> { + MapperService mapper = createMapperService(syntheticSourceMapping(b -> { // TODO randomly use syntheticSourceMapping or normal b.startObject("field"); example.mapping().accept(b); b.endObject(); })); - BlockLoader loader = mapper.fieldType("field").blockLoader(new MappedFieldType.BlockLoaderContext() { + testBlockLoader(columnReader, example, mapper, "field"); + } + + protected final void testBlockLoader(boolean columnReader, SyntheticSourceExample example, MapperService mapper, String loaderFieldName) + throws IOException { + SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null); + BlockLoader loader = mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() { @Override public String indexName() { throw new UnsupportedOperationException(); @@ -1266,13 +1272,18 @@ public String indexName() { @Override public SearchLookup lookup() { - throw new UnsupportedOperationException(); + return searchLookup; } @Override public Set sourcePaths(String name) { return mapper.mappingLookup().sourcePaths(name); } + + @Override + public String parentField(String field) { + return mapper.mappingLookup().parentField(field); + } }); Function valuesConvert = loadBlockExpected(); if (valuesConvert == null) { @@ -1291,7 +1302,7 @@ public Set sourcePaths(String name) { LeafReaderContext ctx = reader.leaves().get(0); TestBlock block; if (columnReader) { - if (supportsColumnAtATimeReader(mapper.fieldType("field"))) { + if (supportsColumnAtATimeReader(mapper, mapper.fieldType(loaderFieldName))) { block = (TestBlock) loader.columnAtATimeReader(ctx).read(TestBlock.FACTORY, TestBlock.docs(0)); } else { assertNull(loader.columnAtATimeReader(ctx)); @@ -1315,6 +1326,7 @@ public Set sourcePaths(String name) { inBlock = valuesConvert.apply(inBlock); } } + // If we're reading from _source we expect the order to be preserved, otherwise it's jumbled. Object expected = loader instanceof BlockSourceReader ? example.expectedParsed() : example.expectedParsedBlockLoader(); if (List.of().equals(expected)) { assertThat(inBlock, nullValue()); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java index a730931208663..967111a09f564 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/BlockReaderFactories.java @@ -62,6 +62,11 @@ public SearchLookup lookup() { public Set sourcePaths(String name) { return ctx.sourcePath(name); } + + @Override + public String parentField(String field) { + return ctx.parentPath(field); + } }); if (loader == null) { HeaderWarning.addWarning("Field [{}] cannot be retrieved, it is unsupported or not indexed; returning null", fieldName);