Skip to content

Commit

Permalink
ESQL: Load text field from parent keyword field
Browse files Browse the repository at this point in the history
This adds support for loading a text field from a parent keyword field.
The mapping for that looks like:
```
"properties": {
  "foo": {
    "type": "keyword",
    "fields": {
      "text": { "type": "text" }
    }
  }
}
```

In this case it's safe to load the `text` subfield from the doc values
for the `keyword` field above.
  • Loading branch information
nik9000 committed Nov 22, 2023
1 parent 5f4fb50 commit bb8edcf
Show file tree
Hide file tree
Showing 13 changed files with 152 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,11 @@ public boolean supportsOrdinals() {
public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException {
return DocValues.getSortedSet(context.reader(), fieldName);
}

@Override
public String toString() {
return "BytesRefsFromOrds[" + fieldName + "]";
}
}

private static class SingletonOrdinals extends BlockDocValuesReader {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,4 +204,11 @@ Set<String> sourcePaths(String field) {

return fieldToCopiedFields.containsKey(resolvedField) ? fieldToCopiedFields.get(resolvedField) : Set.of(resolvedField);
}

/**
* If field is a leaf multi-field return the path to the parent field. Otherwise, return null.
*/
public String parentField(String field) {
return fullSubfieldNameToParentPath.get(field);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,10 @@ public void validateMatchedRoutingPath(final String routingPath) {
);
}
}

public boolean hasNormalizer() {
return normalizer != Lucene.KEYWORD_ANALYZER;
}
}

private final boolean indexed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,11 @@ public interface BlockLoaderContext {
* Find the paths in {@code _source} that contain values for the field named {@code name}.
*/
Set<String> sourcePaths(String name);

/**
* If field is a leaf multi-field return the path to the parent field. Otherwise, return null.
*/
String parentField(String field);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,13 @@ public Set<String> sourcePaths(String field) {
return fieldTypesLookup().sourcePaths(field);
}

/**
* If field is a leaf multi-field return the path to the parent field. Otherwise, return null.
*/
public String parentField(String field) {
return fieldTypesLookup().parentField(field);
}

/**
* Returns true if the index has mappings. An index does not have mappings only if it was created
* without providing mappings explicitly, and no documents have yet been indexed in it.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,8 @@ public TextFieldType(
super(name, indexed, stored, false, tsi, meta);
fielddata = false;
this.isSyntheticSource = isSyntheticSource;
this.syntheticSourceDelegate = syntheticSourceDelegate; // TODO rename to "exactDelegate" or something
// TODO block loader could use a "fast loading" delegate which isn't always the same - but frequently is.
this.syntheticSourceDelegate = syntheticSourceDelegate;
this.eagerGlobalOrdinals = eagerGlobalOrdinals;
this.indexPhrases = indexPhrases;
}
Expand Down Expand Up @@ -946,21 +947,37 @@ protected String delegatingTo() {
}
};
}
if (isSyntheticSource) {
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name());
/*
* If this is a sub-text field try and return the parent's loader. Text
* fields will always be slow to load and if the parent is exact then we
* should use that instead.
*/
String parentField = blContext.parentField(name());
if (parentField != null) {
MappedFieldType parent = blContext.lookup().fieldType(parentField);
if (parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent;
if (kwd.hasNormalizer() == false && (kwd.hasDocValues() || kwd.isStored())) {
return new BlockLoader.Delegating(kwd.blockLoader(blContext)) {
@Override
protected String delegatingTo() {
return kwd.name();
}
};
}
}
}
if (isStored()) {
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name());
}
if (isSyntheticSource) {
/*
* We *shouldn't fall to this exception. The mapping should be
* rejected because we've enabled synthetic source but not configured
* the index properly. But we give it a nice message anyway just in
* case.
* When we're in synthetic source mode we don't currently
* support text fields that are not stored and are not children
* of perfect keyword fields. We'd have to load from the parent
* field and then convert the result to a string.
*/
throw new IllegalArgumentException(
"fetching values from a text field ["
+ name()
+ "] is not supported because synthetic _source is enabled and we don't have a way to load the fields"
);
return null;
}
return new BlockSourceReader.BytesRefsBlockLoader(SourceValueFetcher.toString(blContext.sourcePaths(name())));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,13 @@ public Set<String> sourcePath(String fullName) {
return mappingLookup.sourcePaths(fullName);
}

/**
* If field is a leaf multi-field return the path to the parent field. Otherwise, return null.
*/
public String parentPath(String field) {
return mappingLookup.parentField(field);
}

/**
* Will there be {@code _source}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ public class SearchLookup implements SourceProvider {
* The chain of fields for which this lookup was created, used for detecting
* loops caused by runtime fields referring to other runtime fields. The chain is empty
* for the "top level" lookup created for the entire search. When a lookup is used to load
* fielddata for a field, we fork it and make sure the field name name isn't in the chain,
* then add it to the end. So the lookup for the a field named {@code a} will be {@code ["a"]}. If
* fielddata for a field, we fork it and make sure the field name isn't in the chain,
* then add it to the end. So the lookup for a field named {@code a} will be {@code ["a"]}. If
* that field looks up the values of a field named {@code b} then
* {@code b}'s chain will contain {@code ["a", "b"]}.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -657,18 +657,25 @@ protected Function<Object, Object> loadBlockExpected() {
@Override
protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed) {
assertFalse("keyword doesn't support ignore_malformed", ignoreMalformed);
return new KeywordSyntheticSourceSupport(randomBoolean(), usually() ? null : randomAlphaOfLength(2), true);
return new KeywordSyntheticSourceSupport(
randomBoolean() ? null : between(10, 100),
randomBoolean(),
usually() ? null : randomAlphaOfLength(2),
true
);
}

static class KeywordSyntheticSourceSupport implements SyntheticSourceSupport {
private final Integer ignoreAbove = randomBoolean() ? null : between(10, 100);
private final boolean allIgnored = ignoreAbove != null && rarely();
private final Integer ignoreAbove;
private final boolean allIgnored;
private final boolean store;
private final boolean docValues;
private final String nullValue;
private final boolean exampleSortsUsingIgnoreAbove;

KeywordSyntheticSourceSupport(boolean store, String nullValue, boolean exampleSortsUsingIgnoreAbove) {
KeywordSyntheticSourceSupport(Integer ignoreAbove, boolean store, String nullValue, boolean exampleSortsUsingIgnoreAbove) {
this.ignoreAbove = ignoreAbove;
this.allIgnored = ignoreAbove != null && rarely();
this.store = store;
this.nullValue = nullValue;
this.exampleSortsUsingIgnoreAbove = exampleSortsUsingIgnoreAbove;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.IndexVersion;
import org.elasticsearch.index.analysis.AnalyzerScope;
Expand Down Expand Up @@ -1121,6 +1122,7 @@ protected SyntheticSourceSupport syntheticSourceSupport(boolean ignoreMalformed)
boolean storedKeywordField = storeTextField || randomBoolean();
String nullValue = storeTextField || usually() ? null : randomAlphaOfLength(2);
KeywordFieldMapperTests.KeywordSyntheticSourceSupport keywordSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(
randomBoolean() ? null : between(10, 100),
storedKeywordField,
nullValue,
false == storeTextField
Expand Down Expand Up @@ -1326,8 +1328,50 @@ public void testEmpty() throws Exception {
}

@Override
protected boolean supportsColumnAtATimeReader(MappedFieldType ft) {
TextFieldMapper.TextFieldType text = (TextFieldType) ft;
return text.syntheticSourceDelegate() != null && text.syntheticSourceDelegate().hasDocValues();
protected boolean supportsColumnAtATimeReader(MapperService mapper, MappedFieldType ft) {
String parentName = mapper.mappingLookup().parentField(ft.name());
if (parentName == null) {
TextFieldMapper.TextFieldType text = (TextFieldType) ft;
return text.syntheticSourceDelegate() != null && text.syntheticSourceDelegate().hasDocValues();
}
MappedFieldType parent = mapper.fieldType(parentName);
if (false == parent.typeName().equals(KeywordFieldMapper.CONTENT_TYPE)) {
throw new UnsupportedOperationException();
}
KeywordFieldMapper.KeywordFieldType kwd = (KeywordFieldMapper.KeywordFieldType) parent;
return kwd.hasDocValues();
}

public void testBlockLoaderFromParentColumnReader() throws IOException {
testBlockLoaderFromParent(true, randomBoolean());
}

public void testBlockLoaderParentFromRowStrideReader() throws IOException {
testBlockLoaderFromParent(false, randomBoolean());
}

private void testBlockLoaderFromParent(boolean columnReader, boolean syntheticSource) throws IOException {
boolean storeParent = randomBoolean();
KeywordFieldMapperTests.KeywordSyntheticSourceSupport kwdSupport = new KeywordFieldMapperTests.KeywordSyntheticSourceSupport(
null,
storeParent,
null,
false == storeParent
);
SyntheticSourceExample example = kwdSupport.example(5);
CheckedConsumer<XContentBuilder, IOException> buildFields = b -> {
b.startObject("field");
{
example.mapping().accept(b);
b.startObject("fields").startObject("sub");
{
b.field("type", "text");
}
b.endObject().endObject();
}
b.endObject();
};
MapperService mapper = createMapperService(syntheticSource ? syntheticSourceMapping(buildFields) : mapping(buildFields));
testBlockLoader(columnReader, example, mapper, "field.sub");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,11 @@ public SearchLookup lookup() {
public Set<String> sourcePaths(String name) {
throw new UnsupportedOperationException();
}

@Override
public String parentField(String field) {
throw new UnsupportedOperationException();
}
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1247,32 +1247,43 @@ public final void testBlockLoaderFromRowStrideReader() throws IOException {
testBlockLoader(false);
}

protected boolean supportsColumnAtATimeReader(MappedFieldType ft) {
protected boolean supportsColumnAtATimeReader(MapperService mapper, MappedFieldType ft) {
return ft.hasDocValues();
}

private void testBlockLoader(boolean columnReader) throws IOException {
SyntheticSourceExample example = syntheticSourceSupport(false).example(5);
MapperService mapper = createMapperService(syntheticSourceMapping(b -> {
MapperService mapper = createMapperService(syntheticSourceMapping(b -> { // TODO randomly use syntheticSourceMapping or normal
b.startObject("field");
example.mapping().accept(b);
b.endObject();
}));
BlockLoader loader = mapper.fieldType("field").blockLoader(new MappedFieldType.BlockLoaderContext() {
testBlockLoader(columnReader, example, mapper, "field");
}

protected final void testBlockLoader(boolean columnReader, SyntheticSourceExample example, MapperService mapper, String loaderFieldName)
throws IOException {
SearchLookup searchLookup = new SearchLookup(mapper.mappingLookup().fieldTypesLookup()::get, null, null);
BlockLoader loader = mapper.fieldType(loaderFieldName).blockLoader(new MappedFieldType.BlockLoaderContext() {
@Override
public String indexName() {
throw new UnsupportedOperationException();
}

@Override
public SearchLookup lookup() {
throw new UnsupportedOperationException();
return searchLookup;
}

@Override
public Set<String> sourcePaths(String name) {
return mapper.mappingLookup().sourcePaths(name);
}

@Override
public String parentField(String field) {
return mapper.mappingLookup().parentField(field);
}
});
Function<Object, Object> valuesConvert = loadBlockExpected();
if (valuesConvert == null) {
Expand All @@ -1291,7 +1302,7 @@ public Set<String> sourcePaths(String name) {
LeafReaderContext ctx = reader.leaves().get(0);
TestBlock block;
if (columnReader) {
if (supportsColumnAtATimeReader(mapper.fieldType("field"))) {
if (supportsColumnAtATimeReader(mapper, mapper.fieldType(loaderFieldName))) {
block = (TestBlock) loader.columnAtATimeReader(ctx).read(TestBlock.FACTORY, TestBlock.docs(0));
} else {
assertNull(loader.columnAtATimeReader(ctx));
Expand All @@ -1315,6 +1326,7 @@ public Set<String> sourcePaths(String name) {
inBlock = valuesConvert.apply(inBlock);
}
}
// If we're reading from _source we expect the order to be preserved, otherwise it's jumbled.
Object expected = loader instanceof BlockSourceReader ? example.expectedParsed() : example.expectedParsedBlockLoader();
if (List.of().equals(expected)) {
assertThat(inBlock, nullValue());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ public SearchLookup lookup() {
public Set<String> sourcePaths(String name) {
return ctx.sourcePath(name);
}

@Override
public String parentField(String field) {
return ctx.parentPath(field);
}
});
if (loader == null) {
HeaderWarning.addWarning("Field [{}] cannot be retrieved, it is unsupported or not indexed; returning null", fieldName);
Expand Down

0 comments on commit bb8edcf

Please sign in to comment.