From a10ddcee752dbe2173c34cc7dc485a75d77e3191 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Thu, 16 Nov 2023 13:57:58 -0500 Subject: [PATCH] ESQL: Fix single value query This fixes the lucene query that we use to push "is this a single valued field?" into lucene. In particular, it was erroniously turning itself into a noop any time it saw a field that could have either 0 or 1 value per document. It's right and good for the query to noop itself only when a field can only have 1 value per document. This adds logic to check for that. Closes #102298 --- .../esql/querydsl/query/SingleValueQuery.java | 42 ++++++++++++---- .../querydsl/query/SingleValueQueryTests.java | 48 +++++++++++-------- 2 files changed, 60 insertions(+), 30 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java index b554ccb2920aa..e419be2b7e1fc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQuery.java @@ -10,8 +10,10 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.PointValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; @@ -321,20 +323,30 @@ public Scorer scorer(LeafReaderContext context) throws IOException { * can't do that because we need the check the number of fields. */ if (lfd instanceof LeafNumericFieldData n) { - return scorer(nextScorer, n); + return scorer(context, nextScorer, n); } if (lfd instanceof LeafOrdinalsFieldData o) { - return scorer(nextScorer, o); + return scorer(context, nextScorer, o); } return scorer(nextScorer, lfd); } - private Scorer scorer(Scorer nextScorer, LeafNumericFieldData lfd) { + private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafNumericFieldData lfd) throws IOException { SortedNumericDocValues sortedNumerics = lfd.getLongValues(); if (DocValues.unwrapSingleton(sortedNumerics) != null) { - // Segment contains only single valued fields. - stats.numericSingle++; - return nextScorer; + /* + * Segment contains only single valued fields. But it's possible + * that some fields have 0 values. The most surefire way to check + * is to look at the index for the data. If there isn't an index + * this isn't going to work - but if there is we can compare the + * number of documents in the index to the number of values in it - + * if they are the same we've got a dense singleton. + */ + PointValues points = context.reader().getPointValues(fieldData.getFieldName()); + if (points != null && points.getDocCount() == context.reader().maxDoc()) { + stats.numericSingle++; + return nextScorer; + } } TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator(); if (nextIterator == null) { @@ -353,12 +365,22 @@ private Scorer scorer(Scorer nextScorer, LeafNumericFieldData lfd) { ); } - private Scorer scorer(Scorer nextScorer, LeafOrdinalsFieldData lfd) { + private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafOrdinalsFieldData lfd) throws IOException { SortedSetDocValues sortedSet = lfd.getOrdinalsValues(); if (DocValues.unwrapSingleton(sortedSet) != null) { - // Segment contains only single valued fields. - stats.ordinalsSingle++; - return nextScorer; + /* + * Segment contains only single valued fields. But it's possible + * that some fields have 0 values. The most surefire way to check + * is to look at the index for the data. If there isn't an index + * this isn't going to work - but if there is we can compare the + * number of documents in the index to the number of values in it - + * if they are the same we've got a dense singleton. + */ + Terms terms = context.reader().terms(fieldData.getFieldName()); + if (terms != null && terms.getDocCount() == context.reader().maxDoc()) { + stats.ordinalsSingle++; + return nextScorer; + } } TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator(); if (nextIterator == null) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java index cc5b05537c4c6..a6eacae2857e7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/querydsl/query/SingleValueQueryTests.java @@ -57,8 +57,11 @@ interface Setup { public static List params() { List params = new ArrayList<>(); for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) { - params.add(new Object[] { new StandardSetup(fieldType, false) }); - params.add(new Object[] { new StandardSetup(fieldType, true) }); + for (boolean multivaluedField : new boolean[] { true, false }) { + for (boolean allowEmpty : new boolean[] { true, false }) { + params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) }); + } + } } params.add(new Object[] { new FieldMissingSetup() }); return params; @@ -196,7 +199,7 @@ private void testCase(SingleValueQuery.Builder builder, boolean rewritesToMatchN } } - private record StandardSetup(String fieldType, boolean multivaluedField) implements Setup { + private record StandardSetup(String fieldType, boolean multivaluedField, boolean empty, int count) implements Setup { @Override public XContentBuilder mapping(XContentBuilder builder) throws IOException { builder.startObject("i").field("type", "long").endObject(); @@ -207,27 +210,32 @@ public XContentBuilder mapping(XContentBuilder builder) throws IOException { @Override public List> build(RandomIndexWriter iw) throws IOException { List> fieldValues = new ArrayList<>(100); - for (int i = 0; i < 100; i++) { - // i == 10 forces at least one multivalued field when we're configured for multivalued fields - boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean()); - List values; - if (makeMultivalued) { - int count = between(2, 10); - Set set = new HashSet<>(count); - while (set.size() < count) { - set.add(randomValue()); - } - values = List.copyOf(set); - } else { - values = List.of(randomValue()); - } + for (int i = 0; i < count; i++) { + List values = values(i); fieldValues.add(values); iw.addDocument(docFor(i, values)); } - return fieldValues; } + private List values(int i) { + // i == 10 forces at least one multivalued field when we're configured for multivalued fields + boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean()); + if (makeMultivalued) { + int count = between(2, 10); + Set set = new HashSet<>(count); + while (set.size() < count) { + set.add(randomValue()); + } + return List.copyOf(set); + } + // i == 0 forces at least one empty field when we're configured for empty fields + if (empty && (i == 0 || randomBoolean())) { + return List.of(); + } + return List.of(randomValue()); + } + private Object randomValue() { return switch (fieldType) { case "long" -> randomLong(); @@ -279,7 +287,7 @@ public void assertStats(SingleValueQuery.Builder builder, boolean subHasTwoPhase assertThat(builder.stats().bytesApprox(), equalTo(0)); assertThat(builder.stats().bytesNoApprox(), equalTo(0)); - if (multivaluedField) { + if (multivaluedField || empty) { assertThat(builder.stats().numericSingle(), greaterThanOrEqualTo(0)); if (subHasTwoPhase) { assertThat(builder.stats().numericMultiNoApprox(), equalTo(0)); @@ -300,7 +308,7 @@ public void assertStats(SingleValueQuery.Builder builder, boolean subHasTwoPhase assertThat(builder.stats().numericMultiApprox(), equalTo(0)); assertThat(builder.stats().bytesApprox(), equalTo(0)); assertThat(builder.stats().bytesNoApprox(), equalTo(0)); - if (multivaluedField) { + if (multivaluedField || empty) { assertThat(builder.stats().ordinalsSingle(), greaterThanOrEqualTo(0)); if (subHasTwoPhase) { assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0));