Skip to content

Commit

Permalink
ESQL: Fix single value query
Browse files Browse the repository at this point in the history
This fixes the lucene query that we use to push "is this a single valued
field?" into lucene. In particular, it was erroniously turning itself
into a noop any time it saw a field that could have either 0 or 1 value
per document. It's right and good for the query to noop itself only when
a field can only have 1 value per document. This adds logic to check for
that.

Closes elastic#102298
  • Loading branch information
nik9000 committed Nov 16, 2023
1 parent 43cc8db commit a10ddce
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
Expand Down Expand Up @@ -321,20 +323,30 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
* can't do that because we need the check the number of fields.
*/
if (lfd instanceof LeafNumericFieldData n) {
return scorer(nextScorer, n);
return scorer(context, nextScorer, n);
}
if (lfd instanceof LeafOrdinalsFieldData o) {
return scorer(nextScorer, o);
return scorer(context, nextScorer, o);
}
return scorer(nextScorer, lfd);
}

private Scorer scorer(Scorer nextScorer, LeafNumericFieldData lfd) {
private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafNumericFieldData lfd) throws IOException {
SortedNumericDocValues sortedNumerics = lfd.getLongValues();
if (DocValues.unwrapSingleton(sortedNumerics) != null) {
// Segment contains only single valued fields.
stats.numericSingle++;
return nextScorer;
/*
* Segment contains only single valued fields. But it's possible
* that some fields have 0 values. The most surefire way to check
* is to look at the index for the data. If there isn't an index
* this isn't going to work - but if there is we can compare the
* number of documents in the index to the number of values in it -
* if they are the same we've got a dense singleton.
*/
PointValues points = context.reader().getPointValues(fieldData.getFieldName());
if (points != null && points.getDocCount() == context.reader().maxDoc()) {
stats.numericSingle++;
return nextScorer;
}
}
TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator();
if (nextIterator == null) {
Expand All @@ -353,12 +365,22 @@ private Scorer scorer(Scorer nextScorer, LeafNumericFieldData lfd) {
);
}

private Scorer scorer(Scorer nextScorer, LeafOrdinalsFieldData lfd) {
private Scorer scorer(LeafReaderContext context, Scorer nextScorer, LeafOrdinalsFieldData lfd) throws IOException {
SortedSetDocValues sortedSet = lfd.getOrdinalsValues();
if (DocValues.unwrapSingleton(sortedSet) != null) {
// Segment contains only single valued fields.
stats.ordinalsSingle++;
return nextScorer;
/*
* Segment contains only single valued fields. But it's possible
* that some fields have 0 values. The most surefire way to check
* is to look at the index for the data. If there isn't an index
* this isn't going to work - but if there is we can compare the
* number of documents in the index to the number of values in it -
* if they are the same we've got a dense singleton.
*/
Terms terms = context.reader().terms(fieldData.getFieldName());
if (terms != null && terms.getDocCount() == context.reader().maxDoc()) {
stats.ordinalsSingle++;
return nextScorer;
}
}
TwoPhaseIterator nextIterator = nextScorer.twoPhaseIterator();
if (nextIterator == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ interface Setup {
public static List<Object[]> params() {
List<Object[]> params = new ArrayList<>();
for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) {
params.add(new Object[] { new StandardSetup(fieldType, false) });
params.add(new Object[] { new StandardSetup(fieldType, true) });
for (boolean multivaluedField : new boolean[] { true, false }) {
for (boolean allowEmpty : new boolean[] { true, false }) {
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
}
}
}
params.add(new Object[] { new FieldMissingSetup() });
return params;
Expand Down Expand Up @@ -196,7 +199,7 @@ private void testCase(SingleValueQuery.Builder builder, boolean rewritesToMatchN
}
}

private record StandardSetup(String fieldType, boolean multivaluedField) implements Setup {
private record StandardSetup(String fieldType, boolean multivaluedField, boolean empty, int count) implements Setup {
@Override
public XContentBuilder mapping(XContentBuilder builder) throws IOException {
builder.startObject("i").field("type", "long").endObject();
Expand All @@ -207,27 +210,32 @@ public XContentBuilder mapping(XContentBuilder builder) throws IOException {
@Override
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
List<List<Object>> fieldValues = new ArrayList<>(100);
for (int i = 0; i < 100; i++) {
// i == 10 forces at least one multivalued field when we're configured for multivalued fields
boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean());
List<Object> values;
if (makeMultivalued) {
int count = between(2, 10);
Set<Object> set = new HashSet<>(count);
while (set.size() < count) {
set.add(randomValue());
}
values = List.copyOf(set);
} else {
values = List.of(randomValue());
}
for (int i = 0; i < count; i++) {
List<Object> values = values(i);
fieldValues.add(values);
iw.addDocument(docFor(i, values));
}

return fieldValues;
}

private List<Object> values(int i) {
// i == 10 forces at least one multivalued field when we're configured for multivalued fields
boolean makeMultivalued = multivaluedField && (i == 10 || randomBoolean());
if (makeMultivalued) {
int count = between(2, 10);
Set<Object> set = new HashSet<>(count);
while (set.size() < count) {
set.add(randomValue());
}
return List.copyOf(set);
}
// i == 0 forces at least one empty field when we're configured for empty fields
if (empty && (i == 0 || randomBoolean())) {
return List.of();
}
return List.of(randomValue());
}

private Object randomValue() {
return switch (fieldType) {
case "long" -> randomLong();
Expand Down Expand Up @@ -279,7 +287,7 @@ public void assertStats(SingleValueQuery.Builder builder, boolean subHasTwoPhase
assertThat(builder.stats().bytesApprox(), equalTo(0));
assertThat(builder.stats().bytesNoApprox(), equalTo(0));

if (multivaluedField) {
if (multivaluedField || empty) {
assertThat(builder.stats().numericSingle(), greaterThanOrEqualTo(0));
if (subHasTwoPhase) {
assertThat(builder.stats().numericMultiNoApprox(), equalTo(0));
Expand All @@ -300,7 +308,7 @@ public void assertStats(SingleValueQuery.Builder builder, boolean subHasTwoPhase
assertThat(builder.stats().numericMultiApprox(), equalTo(0));
assertThat(builder.stats().bytesApprox(), equalTo(0));
assertThat(builder.stats().bytesNoApprox(), equalTo(0));
if (multivaluedField) {
if (multivaluedField || empty) {
assertThat(builder.stats().ordinalsSingle(), greaterThanOrEqualTo(0));
if (subHasTwoPhase) {
assertThat(builder.stats().ordinalsMultiNoApprox(), equalTo(0));
Expand Down

0 comments on commit a10ddce

Please sign in to comment.