diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java index f4b93ebf88b49..679a895b1a5e5 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/compute/operator/ValuesSourceReaderBenchmark.java @@ -108,9 +108,9 @@ public class ValuesSourceReaderBenchmark { private static List fields(String name) { return switch (name) { case "3_stored_keywords" -> List.of( - new ValuesSourceReaderOperator.FieldInfo(name, List.of(blockLoader("stored_keyword_1"))), - new ValuesSourceReaderOperator.FieldInfo(name, List.of(blockLoader("stored_keyword_2"))), - new ValuesSourceReaderOperator.FieldInfo(name, List.of(blockLoader("stored_keyword_3"))) + new ValuesSourceReaderOperator.FieldInfo("keyword_1", List.of(blockLoader("stored_keyword_1"))), + new ValuesSourceReaderOperator.FieldInfo("keyword_2", List.of(blockLoader("stored_keyword_2"))), + new ValuesSourceReaderOperator.FieldInfo("keyword_3", List.of(blockLoader("stored_keyword_3"))) ); default -> List.of(new ValuesSourceReaderOperator.FieldInfo(name, List.of(blockLoader(name)))); }; @@ -314,6 +314,22 @@ public void benchmark() { if (expected != sum) { throw new AssertionError("[" + layout + "][" + name + "] expected [" + expected + "] but was [" + sum + "]"); } + boolean foundStoredFieldLoader = false; + ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) op.status(); + for (Map.Entry e : status.readersBuilt().entrySet()) { + if (e.getKey().indexOf("stored_fields") >= 0) { + foundStoredFieldLoader = true; + } + } + if (name.indexOf("stored") >= 0) { + if (foundStoredFieldLoader == false) { + throw new AssertionError("expected to use a stored field loader but only had: " + status.readersBuilt()); + } + } else { + if (foundStoredFieldLoader) { + throw new AssertionError("expected not to use a stored field loader but only had: " + status.readersBuilt()); + } + } } @Setup diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index 055d4a4e7e862..4576be174b27b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -148,6 +148,11 @@ public void read(int docId, StoredFields storedFields, Builder builder) throws I public boolean canReuse(int startingDocID) { return true; } + + @Override + public String toString() { + return "constant_nulls"; + } } /** @@ -172,6 +177,11 @@ public Block read(BlockFactory factory, Docs docs) { public boolean canReuse(int startingDocID) { return true; } + + @Override + public String toString() { + return "constant[" + value + "]"; + } }; } @@ -187,6 +197,11 @@ public void read(int docId, StoredFields storedFields, Builder builder) { public boolean canReuse(int startingDocID) { return true; } + + @Override + public String toString() { + return "constant[" + value + "]"; + } }; } @@ -212,6 +227,89 @@ public String toString() { }; } + abstract class Delegating implements BlockLoader { + protected final BlockLoader delegate; + + protected Delegating(BlockLoader delegate) { + this.delegate = delegate; + } + + @Override + public Builder builder(BlockFactory factory, int expectedCount) { + return delegate.builder(factory, expectedCount); + } + + @Override + public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws IOException { + ColumnAtATimeReader reader = delegate.columnAtATimeReader(context); + if (reader == null) { + return null; + } + return new ColumnAtATimeReader() { + @Override + public Block read(BlockFactory factory, Docs docs) throws IOException { + return reader.read(factory, docs); + } + + @Override + public boolean canReuse(int startingDocID) { + return reader.canReuse(startingDocID); + } + + @Override + public String toString() { + return "Delegating[to=" + delegatingTo() + ", impl=" + reader + "]"; + } + }; + } + + @Override + public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { + RowStrideReader reader = delegate.rowStrideReader(context); + if (reader == null) { + return null; + } + return new RowStrideReader() { + @Override + public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { + reader.read(docId, storedFields, builder); + } + + @Override + public boolean canReuse(int startingDocID) { + return reader.canReuse(startingDocID); + } + + @Override + public String toString() { + return "Delegating[to=" + delegatingTo() + ", impl=" + reader + "]"; + } + }; + } + + @Override + public StoredFieldsSpec rowStrideStoredFieldSpec() { + return delegate.rowStrideStoredFieldSpec(); + } + + @Override + public boolean supportsOrdinals() { + return delegate.supportsOrdinals(); + } + + @Override + public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException { + return delegate.ordinals(context); + } + + protected abstract String delegatingTo(); + + @Override + public final String toString() { + return "Delegating[to=" + delegatingTo() + ", impl=" + delegate + "]"; + } + } + /** * A list of documents to load. Documents are always in non-decreasing order. */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 54851976eccd8..54f2b43a3e448 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -21,6 +21,8 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; @@ -70,6 +72,7 @@ import org.elasticsearch.script.field.DelegateDocValuesField; import org.elasticsearch.script.field.TextDocValuesField; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; +import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.xcontent.ToXContent; import org.elasticsearch.xcontent.XContentBuilder; @@ -939,7 +942,12 @@ public boolean isAggregatable() { @Override public BlockLoader blockLoader(BlockLoaderContext blContext) { if (syntheticSourceDelegate != null) { - return syntheticSourceDelegate.blockLoader(blContext); + return new BlockLoader.Delegating(syntheticSourceDelegate.blockLoader(blContext)) { + @Override + protected String delegatingTo() { + return syntheticSourceDelegate.name(); + } + }; } if (isSyntheticSource) { if (isStored()) { diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java index 424efd19a5cf6..6bdac6bb284c7 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperator.java @@ -163,6 +163,7 @@ public int get(int i) { StoredFieldLoader.fromSpec(storedFieldsSpec).getLoader(ctx(shard, segment), null), storedFieldsSpec.requiresSource() ); + trackStoredFields(storedFieldsSpec); // TODO when optimization is enabled add it to tracking } for (int p = 0; p < docs.getPositionCount(); p++) { int doc = docs.getInt(p); @@ -205,13 +206,13 @@ private void loadFromManyLeaves(Block[] blocks, DocVector docVector) throws IOEx if (shard != lastShard || segment != lastSegment) { lastShard = shard; lastSegment = segment; - if (false == storedFieldsSpecForShard(shard).equals(StoredFieldsSpec.NO_REQUIREMENTS)) { - StoredFieldsSpec storedFieldsSpec = storedFieldsSpecForShard(shard); + StoredFieldsSpec storedFieldsSpec = storedFieldsSpecForShard(shard); + if (false == storedFieldsSpec.equals(StoredFieldsSpec.NO_REQUIREMENTS)) { storedFields = new BlockLoaderStoredFieldsFromLeafLoader( StoredFieldLoader.fromSpec(storedFieldsSpec).getLoader(ctx(shard, segment), null), storedFieldsSpec.requiresSource() ); - // TODO track the number of times we build the stored fields reader + trackStoredFields(storedFieldsSpec); } } if (storedFields != null) { @@ -231,6 +232,14 @@ private void loadFromManyLeaves(Block[] blocks, DocVector docVector) throws IOEx } } + private void trackStoredFields(StoredFieldsSpec spec) { + readersBuilt.merge( + "stored_fields[" + "requires_source:" + spec.requiresSource() + ", fields:" + spec.requiredStoredFields().size() + "]", + 1, + (prev, one) -> prev + one + ); + } + /** * Returns a builder from the first non - {@link BlockLoader#CONSTANT_NULLS} loader * in the list. If they are all the null loader then returns a null builder. @@ -261,6 +270,11 @@ private class FieldWork { BlockLoader.ColumnAtATimeReader build(BlockLoader loader, LeafReaderContext ctx) throws IOException { return loader.columnAtATimeReader(ctx); } + + @Override + String type() { + return "column_at_a_time"; + } }; final GuardedReader rowStride = new GuardedReader<>() { @@ -268,6 +282,11 @@ BlockLoader.ColumnAtATimeReader build(BlockLoader loader, LeafReaderContext ctx) BlockLoader.RowStrideReader build(BlockLoader loader, LeafReaderContext ctx) throws IOException { return loader.rowStrideReader(ctx); } + + @Override + String type() { + return "row_stride"; + } }; FieldWork(FieldInfo info) { @@ -280,17 +299,24 @@ private abstract class GuardedReader { V lastReader; V reader(int shard, int segment, int startingDocId) throws IOException { - if (lastShard == shard && lastSegment == segment && lastReader != null && lastReader.canReuse(startingDocId)) { - return lastReader; + if (lastShard == shard && lastSegment == segment) { + if (lastReader == null) { + return null; + } + if (lastReader.canReuse(startingDocId)) { + return lastReader; + } } lastShard = shard; lastSegment = segment; lastReader = build(info.blockLoaders.get(shard), ctx(shard, segment)); - readersBuilt.merge(info.name + ":" + lastReader, 1, (prev, one) -> prev + one); + readersBuilt.merge(info.name + ":" + type() + ":" + lastReader, 1, (prev, one) -> prev + one); return lastReader; } abstract V build(BlockLoader loader, LeafReaderContext ctx) throws IOException; + + abstract String type(); } } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java index 6694eb4f56465..57904a7a25c7c 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/lucene/ValuesSourceReaderOperatorTests.java @@ -67,6 +67,7 @@ import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.json.JsonXContent; +import org.hamcrest.Matcher; import org.junit.After; import java.io.IOException; @@ -78,8 +79,13 @@ import java.util.stream.IntStream; import static org.elasticsearch.compute.lucene.LuceneSourceOperatorTests.mockSearchContext; +import static org.elasticsearch.test.MapMatcher.assertMap; +import static org.elasticsearch.test.MapMatcher.matchesMap; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.lessThanOrEqualTo; +import static org.hamcrest.Matchers.not; +import static org.hamcrest.Matchers.nullValue; /** * Tests for {@link ValuesSourceReaderOperator}. Turns off {@link HandleLimitFS} @@ -114,7 +120,7 @@ protected Operator.OperatorFactory simple(BigArrays bigArrays) { throw new RuntimeException(e); } } - return factory(reader, new NumberFieldMapper.NumberFieldType("long", NumberFieldMapper.NumberType.LONG)); + return factory(reader, docValuesNumberField("long", NumberFieldMapper.NumberType.LONG)); } static Operator.OperatorFactory factory(IndexReader reader, MappedFieldType ft) { @@ -319,74 +325,166 @@ public Set sourcePaths(String name) { } private void loadSimpleAndAssert(DriverContext driverContext, List input, Block.MvOrdering docValuesMvOrdering) { - interface Check { - void check(Block block, int position, int key); + List cases = infoAndChecksForEachType(docValuesMvOrdering); + + List operators = new ArrayList<>(); + operators.add( + new ValuesSourceReaderOperator.Factory( + List.of(fieldInfo(docValuesNumberField("key", NumberFieldMapper.NumberType.INTEGER))), + List.of(reader), + 0 + ).get(driverContext) + ); + List tests = new ArrayList<>(); + while (cases.isEmpty() == false) { + List b = randomNonEmptySubsetOf(cases); + cases.removeAll(b); + tests.addAll(b); + operators.add( + new ValuesSourceReaderOperator.Factory(b.stream().map(i -> i.info).toList(), List.of(reader), 0).get(driverContext) + ); } - record Info(ValuesSourceReaderOperator.FieldInfo info, Check check) { - Info(MappedFieldType ft, Check check) { - this(fieldInfo(ft), check); + List results = drive(operators, input.iterator(), driverContext); + assertThat(results, hasSize(input.size())); + for (Page page : results) { + assertThat(page.getBlockCount(), equalTo(tests.size() + 2 /* one for doc and one for keys */)); + IntVector keys = page.getBlock(1).asVector(); + for (int p = 0; p < page.getPositionCount(); p++) { + int key = keys.getInt(p); + for (int i = 0; i < tests.size(); i++) { + try { + tests.get(i).checkResults.check(page.getBlock(2 + i), p, key); + } catch (AssertionError e) { + throw new AssertionError("error checking " + tests.get(i).info.name() + "[" + p + "]: " + e.getMessage(), e); + } + } } } + for (Operator op : operators) { + assertThat(((ValuesSourceReaderOperator) op).status().pagesProcessed(), equalTo(input.size())); + } + assertDriverContext(driverContext); + } + + interface CheckResults { + void check(Block block, int position, int key); + } + + interface CheckReaders { + void check(boolean forcedRowByRow, int pageCount, int segmentCount, Map readersBuilt); + } + + record FieldCase(ValuesSourceReaderOperator.FieldInfo info, CheckResults checkResults, CheckReaders checkReaders) { + FieldCase(MappedFieldType ft, CheckResults checkResults, CheckReaders checkReaders) { + this(fieldInfo(ft), checkResults, checkReaders); + } + + FieldCase(MappedFieldType ft, CheckResults checkResults) { + // NOCOMMIT remove me + this(fieldInfo(ft), checkResults, (forcedRowByRow, pageCount, segmentCount, readers) -> {}); + } + } + + /** + * Asserts that {@link ValuesSourceReaderOperator#status} claims that only + * the expected readers are built after loading singleton pages. + */ + // @Repeat(iterations = 100) + public void testLoadAllStatus() { + DriverContext driverContext = driverContext(); + testLoadAllStatus(false); + } + + /** + * Asserts that {@link ValuesSourceReaderOperator#status} claims that only + * the expected readers are built after loading non-singleton pages. + */ + // @Repeat(iterations = 100) + public void testLoadAllStatusAllInOnePage() { + testLoadAllStatus(true); + } + + private void testLoadAllStatus(boolean allInOnePage) { + DriverContext driverContext = driverContext(); + List input = CannedSourceOperator.collectPages(simpleInput(driverContext.blockFactory(), between(100, 5000))); + List cases = infoAndChecksForEachType(Block.MvOrdering.DEDUPLICATED_AND_SORTED_ASCENDING); + // Build one operator for each field, so we get a unique map to assert on + List operators = cases.stream() + .map(i -> new ValuesSourceReaderOperator.Factory(List.of(i.info), List.of(reader), 0).get(driverContext)) + .toList(); + if (allInOnePage) { + input = List.of(CannedSourceOperator.mergePages(input)); + } + drive(operators, input.iterator(), driverContext); + for (int i = 0; i < cases.size(); i++) { + ValuesSourceReaderOperator.Status status = (ValuesSourceReaderOperator.Status) operators.get(i).status(); + assertThat(status.pagesProcessed(), equalTo(input.size())); + FieldCase fc = cases.get(i); + fc.checkReaders.check(allInOnePage, input.size(), reader.leaves().size(), status.readersBuilt()); + } + } + + private List infoAndChecksForEachType(Block.MvOrdering docValuesMvOrdering) { class Checks { - void checkLongs(Block block, int position, int key) { + void longs(Block block, int position, int key) { LongVector longs = ((LongBlock) block).asVector(); assertThat(longs.getLong(position), equalTo((long) key)); } - void checkInts(Block block, int position, int key) { + void ints(Block block, int position, int key) { IntVector ints = ((IntBlock) block).asVector(); assertThat(ints.getInt(position), equalTo(key)); } - void checkShorts(Block block, int position, int key) { + void shorts(Block block, int position, int key) { IntVector ints = ((IntBlock) block).asVector(); assertThat(ints.getInt(position), equalTo((int) (short) key)); } - void checkBytes(Block block, int position, int key) { + void bytes(Block block, int position, int key) { IntVector ints = ((IntBlock) block).asVector(); assertThat(ints.getInt(position), equalTo((int) (byte) key)); } - void checkDoubles(Block block, int position, int key) { + void doubles(Block block, int position, int key) { DoubleVector doubles = ((DoubleBlock) block).asVector(); assertThat(doubles.getDouble(position), equalTo(key / 123_456d)); } - void checkStrings(Block block, int position, int key) { + void strings(Block block, int position, int key) { BytesRefVector keywords = ((BytesRefBlock) block).asVector(); assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo(Integer.toString(key))); } - void checkBools(Block block, int position, int key) { + void bools(Block block, int position, int key) { BooleanVector bools = ((BooleanBlock) block).asVector(); assertThat(bools.getBoolean(position), equalTo(key % 2 == 0)); } - void checkIds(Block block, int position, int key) { + void ids(Block block, int position, int key) { BytesRefVector ids = ((BytesRefBlock) block).asVector(); assertThat(ids.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("id")); } - void checkConstantBytes(Block block, int position, int key) { + void constantBytes(Block block, int position, int key) { BytesRefVector keywords = ((BytesRefBlock) block).asVector(); assertThat(keywords.getBytesRef(position, new BytesRef()).utf8ToString(), equalTo("foo")); } - void checkConstantNulls(Block block, int position, int key) { + void constantNulls(Block block, int position, int key) { assertTrue(block.areAllValuesNull()); assertTrue(block.isNull(position)); } - void checkMvLongsFromDocValues(Block block, int position, int key) { - checkMvLongs(block, position, key, docValuesMvOrdering); + void mvLongsFromDocValues(Block block, int position, int key) { + mvLongs(block, position, key, docValuesMvOrdering); } - void checkMvLongsUnordered(Block block, int position, int key) { - checkMvLongs(block, position, key, Block.MvOrdering.UNORDERED); + void mvLongsUnordered(Block block, int position, int key) { + mvLongs(block, position, key, Block.MvOrdering.UNORDERED); } - private void checkMvLongs(Block block, int position, int key, Block.MvOrdering expectedMv) { + private void mvLongs(Block block, int position, int key, Block.MvOrdering expectedMv) { LongBlock longs = (LongBlock) block; assertThat(longs.getValueCount(position), equalTo(key % 3 + 1)); int offset = longs.getFirstValueIndex(position); @@ -398,15 +496,15 @@ private void checkMvLongs(Block block, int position, int key, Block.MvOrdering e } } - void checkMvIntsFromDocValues(Block block, int position, int key) { - checkMvInts(block, position, key, docValuesMvOrdering); + void mvIntsFromDocValues(Block block, int position, int key) { + mvInts(block, position, key, docValuesMvOrdering); } - void checkMvIntsUnordered(Block block, int position, int key) { - checkMvInts(block, position, key, Block.MvOrdering.UNORDERED); + void mvIntsUnordered(Block block, int position, int key) { + mvInts(block, position, key, Block.MvOrdering.UNORDERED); } - private void checkMvInts(Block block, int position, int key, Block.MvOrdering expectedMv) { + private void mvInts(Block block, int position, int key, Block.MvOrdering expectedMv) { IntBlock ints = (IntBlock) block; assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); int offset = ints.getFirstValueIndex(position); @@ -418,7 +516,7 @@ private void checkMvInts(Block block, int position, int key, Block.MvOrdering ex } } - void checkMvShorts(Block block, int position, int key) { + void mvShorts(Block block, int position, int key) { IntBlock ints = (IntBlock) block; assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); int offset = ints.getFirstValueIndex(position); @@ -430,7 +528,7 @@ void checkMvShorts(Block block, int position, int key) { } } - void checkMvBytes(Block block, int position, int key) { + void mvBytes(Block block, int position, int key) { IntBlock ints = (IntBlock) block; assertThat(ints.getValueCount(position), equalTo(key % 3 + 1)); int offset = ints.getFirstValueIndex(position); @@ -442,7 +540,7 @@ void checkMvBytes(Block block, int position, int key) { } } - void checkMvDoubles(Block block, int position, int key) { + void mvDoubles(Block block, int position, int key) { DoubleBlock doubles = (DoubleBlock) block; int offset = doubles.getFirstValueIndex(position); for (int v = 0; v <= key % 3; v++) { @@ -453,15 +551,15 @@ void checkMvDoubles(Block block, int position, int key) { } } - void checkMvStringsFromDocValues(Block block, int position, int key) { - checkMvStrings(block, position, key, docValuesMvOrdering); + void mvStringsFromDocValues(Block block, int position, int key) { + mvStrings(block, position, key, docValuesMvOrdering); } - void checkMvStringsUnordered(Block block, int position, int key) { - checkMvStrings(block, position, key, Block.MvOrdering.UNORDERED); + void mvStringsUnordered(Block block, int position, int key) { + mvStrings(block, position, key, Block.MvOrdering.UNORDERED); } - void checkMvStrings(Block block, int position, int key, Block.MvOrdering expectedMv) { + void mvStrings(Block block, int position, int key, Block.MvOrdering expectedMv) { BytesRefBlock text = (BytesRefBlock) block; assertThat(text.getValueCount(position), equalTo(key % 3 + 1)); int offset = text.getFirstValueIndex(position); @@ -473,7 +571,7 @@ void checkMvStrings(Block block, int position, int key, Block.MvOrdering expecte } } - void checkMvBools(Block block, int position, int key) { + void mvBools(Block block, int position, int key) { BooleanBlock bools = (BooleanBlock) block; assertThat(bools.getValueCount(position), equalTo(key % 3 + 1)); int offset = bools.getFirstValueIndex(position); @@ -486,106 +584,195 @@ void checkMvBools(Block block, int position, int key) { } } Checks checks = new Checks(); - List infos = new ArrayList<>(); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("long", NumberFieldMapper.NumberType.LONG), checks::checkLongs)); - infos.add( - new Info(new NumberFieldMapper.NumberFieldType("mv_long", NumberFieldMapper.NumberType.LONG), checks::checkMvLongsFromDocValues) - ); - infos.add(new Info(sourceNumberField("source_long", NumberFieldMapper.NumberType.LONG), checks::checkLongs)); - infos.add(new Info(sourceNumberField("mv_source_long", NumberFieldMapper.NumberType.LONG), checks::checkMvLongsUnordered)); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("int", NumberFieldMapper.NumberType.INTEGER), checks::checkInts)); - infos.add( - new Info( - new NumberFieldMapper.NumberFieldType("mv_int", NumberFieldMapper.NumberType.INTEGER), - checks::checkMvIntsFromDocValues + class StatusChecks { + static void longs(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry("long:row_stride:SingletonLongs", lessThanOrEqualTo(segmentCount))); + } else { + assertMap(readers, matchesMap().entry("long:column_at_a_time:SingletonLongs", lessThanOrEqualTo(pageCount))); + } + } + + static void textWithDelegate(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap( + readers, + matchesMap().entry("text_with_delegate:row_stride:Delegating[to=kwd, impl=SingletonOrdinals]", segmentCount) + ); + } else { + assertMap( + readers, + matchesMap().entry( + "text_with_delegate:column_at_a_time:Delegating[to=kwd, impl=SingletonOrdinals]", + lessThanOrEqualTo(pageCount) + ) + ); + } + } + + static void bool(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry("bool:row_stride:SingletonBooleans", segmentCount)); + } else { + assertMap(readers, matchesMap().entry("bool:column_at_a_time:SingletonBooleans", lessThanOrEqualTo(pageCount))); + } + } + + static void mvLongs(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_long", "Longs", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvBool(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + mvDocValues("mv_bool", "Booleans", forcedRowByRow, pageCount, segmentCount, readers); + } + + static void mvTextWithDelegate(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap( + readers, + matchesMap().entry( + "mv_text_with_delegate:row_stride:Delegating[to=mv_kwd, impl=SingletonOrdinals]", + lessThanOrEqualTo(segmentCount) + ).entry("mv_text_with_delegate:row_stride:Delegating[to=mv_kwd, impl=Ordinals]", lessThanOrEqualTo(segmentCount)) + ); + } else { + assertMap( + readers, + matchesMap().entry( + "mv_text_with_delegate:column_at_a_time:Delegating[to=mv_kwd, impl=SingletonOrdinals]", + lessThanOrEqualTo(pageCount) + ).entry("mv_text_with_delegate:column_at_a_time:Delegating[to=mv_kwd, impl=Ordinals]", lessThanOrEqualTo(pageCount)) + ); + } + } + + private static void mvDocValues( + String name, + String type, + boolean forcedRowByRow, + int pageCount, + int segmentCount, + Map readers + ) { + if (forcedRowByRow) { + Integer singletons = (Integer) readers.remove(name + ":row_stride:Singleton" + type); + if (singletons != null) { + segmentCount -= singletons; + } + assertMap(readers, matchesMap().entry(name + ":row_stride:" + type, segmentCount)); + } else { + Integer singletons = (Integer) readers.remove(name + ":column_at_a_time:Singleton" + type); + if (singletons != null) { + pageCount -= singletons; + } + assertMap(readers, matchesMap().entry(name + ":column_at_a_time:" + type, lessThanOrEqualTo(pageCount))); + } + } + + static void id(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + Matcher count; + if (forcedRowByRow) { + count = equalTo(segmentCount); + } else { + count = lessThanOrEqualTo(pageCount); + Integer columnAttempts = (Integer) readers.remove("_id:column_at_a_time:null"); + assertThat(columnAttempts, not(nullValue())); + } + assertMap( + readers, + matchesMap().entry("_id:row_stride:BlockStoredFieldsReader.Id", count) + .entry("stored_fields[requires_source:false, fields:1]", count) + ); + } + + static void constantBytes(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry("constant_bytes:row_stride:constant[[66 6f 6f]]", segmentCount)); + } else { + assertMap( + readers, + matchesMap().entry("constant_bytes:column_at_a_time:constant[[66 6f 6f]]", lessThanOrEqualTo(pageCount)) + ); + } + } + + static void constantNulls(boolean forcedRowByRow, int pageCount, int segmentCount, Map readers) { + if (forcedRowByRow) { + assertMap(readers, matchesMap().entry("null:row_stride:constant_nulls", segmentCount)); + } else { + assertMap(readers, matchesMap().entry("null:column_at_a_time:constant_nulls", lessThanOrEqualTo(pageCount))); + } + } + } + List r = new ArrayList<>(); + r.add(new FieldCase(docValuesNumberField("long", NumberFieldMapper.NumberType.LONG), checks::longs, StatusChecks::longs)); + r.add( + new FieldCase( + docValuesNumberField("mv_long", NumberFieldMapper.NumberType.LONG), + checks::mvLongsFromDocValues, + StatusChecks::mvLongs ) ); - infos.add(new Info(sourceNumberField("source_int", NumberFieldMapper.NumberType.INTEGER), checks::checkInts)); - infos.add(new Info(sourceNumberField("mv_source_int", NumberFieldMapper.NumberType.INTEGER), checks::checkMvIntsUnordered)); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("short", NumberFieldMapper.NumberType.SHORT), checks::checkShorts)); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("mv_short", NumberFieldMapper.NumberType.SHORT), checks::checkMvShorts)); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("byte", NumberFieldMapper.NumberType.BYTE), checks::checkBytes)); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("mv_byte", NumberFieldMapper.NumberType.BYTE), checks::checkMvBytes)); - infos.add(new Info(new NumberFieldMapper.NumberFieldType("double", NumberFieldMapper.NumberType.DOUBLE), checks::checkDoubles)); - infos.add( - new Info(new NumberFieldMapper.NumberFieldType("mv_double", NumberFieldMapper.NumberType.DOUBLE), checks::checkMvDoubles) - ); - infos.add(new Info(new KeywordFieldMapper.KeywordFieldType("kwd"), checks::checkStrings)); - infos.add(new Info(new KeywordFieldMapper.KeywordFieldType("mv_kwd"), checks::checkMvStringsFromDocValues)); - infos.add(new Info(storedKeywordField("stored_kwd"), checks::checkStrings)); - infos.add(new Info(storedKeywordField("mv_stored_kwd"), checks::checkMvStringsUnordered)); - infos.add(new Info(new TextFieldMapper.TextFieldType("source_text", false), checks::checkStrings)); - infos.add(new Info(new TextFieldMapper.TextFieldType("mv_source_text", false), checks::checkMvStringsUnordered)); - infos.add(new Info(storedTextField("stored_text"), checks::checkStrings)); - infos.add(new Info(storedTextField("mv_stored_text"), checks::checkMvStringsUnordered)); - infos.add( - new Info(textFieldWithDelegate("text_with_delegate", new KeywordFieldMapper.KeywordFieldType("kwd")), checks::checkStrings) + r.add(new FieldCase(sourceNumberField("source_long", NumberFieldMapper.NumberType.LONG), checks::longs)); + r.add(new FieldCase(sourceNumberField("mv_source_long", NumberFieldMapper.NumberType.LONG), checks::mvLongsUnordered)); + r.add(new FieldCase(docValuesNumberField("int", NumberFieldMapper.NumberType.INTEGER), checks::ints)); + r.add(new FieldCase(docValuesNumberField("mv_int", NumberFieldMapper.NumberType.INTEGER), checks::mvIntsFromDocValues)); + r.add(new FieldCase(sourceNumberField("source_int", NumberFieldMapper.NumberType.INTEGER), checks::ints)); + r.add(new FieldCase(sourceNumberField("mv_source_int", NumberFieldMapper.NumberType.INTEGER), checks::mvIntsUnordered)); + r.add(new FieldCase(docValuesNumberField("short", NumberFieldMapper.NumberType.SHORT), checks::shorts)); + r.add(new FieldCase(docValuesNumberField("mv_short", NumberFieldMapper.NumberType.SHORT), checks::mvShorts)); + r.add(new FieldCase(docValuesNumberField("byte", NumberFieldMapper.NumberType.BYTE), checks::bytes)); + r.add(new FieldCase(docValuesNumberField("mv_byte", NumberFieldMapper.NumberType.BYTE), checks::mvBytes)); + r.add(new FieldCase(docValuesNumberField("double", NumberFieldMapper.NumberType.DOUBLE), checks::doubles)); + r.add(new FieldCase(docValuesNumberField("mv_double", NumberFieldMapper.NumberType.DOUBLE), checks::mvDoubles)); + r.add(new FieldCase(new KeywordFieldMapper.KeywordFieldType("kwd"), checks::strings)); + r.add(new FieldCase(new KeywordFieldMapper.KeywordFieldType("mv_kwd"), checks::mvStringsFromDocValues)); + r.add(new FieldCase(storedKeywordField("stored_kwd"), checks::strings)); + r.add(new FieldCase(storedKeywordField("mv_stored_kwd"), checks::mvStringsUnordered)); + r.add(new FieldCase(new TextFieldMapper.TextFieldType("source_text", false), checks::strings)); + r.add(new FieldCase(new TextFieldMapper.TextFieldType("mv_source_text", false), checks::mvStringsUnordered)); + r.add(new FieldCase(storedTextField("stored_text"), checks::strings)); + r.add(new FieldCase(storedTextField("mv_stored_text"), checks::mvStringsUnordered)); + r.add( + new FieldCase( + textFieldWithDelegate("text_with_delegate", new KeywordFieldMapper.KeywordFieldType("kwd")), + checks::strings, + StatusChecks::textWithDelegate + ) ); - infos.add( - new Info( + r.add( + new FieldCase( textFieldWithDelegate("mv_text_with_delegate", new KeywordFieldMapper.KeywordFieldType("mv_kwd")), - checks::checkMvStringsFromDocValues + checks::mvStringsFromDocValues, + StatusChecks::mvTextWithDelegate ) ); - infos.add(new Info(new BooleanFieldMapper.BooleanFieldType("bool"), checks::checkBools)); - infos.add(new Info(new BooleanFieldMapper.BooleanFieldType("mv_bool"), checks::checkMvBools)); - infos.add(new Info(new ProvidedIdFieldMapper(() -> false).fieldType(), checks::checkIds)); - infos.add(new Info(TsidExtractingIdFieldMapper.INSTANCE.fieldType(), checks::checkIds)); - infos.add( - new Info( + r.add(new FieldCase(new BooleanFieldMapper.BooleanFieldType("bool"), checks::bools, StatusChecks::bool)); + r.add(new FieldCase(new BooleanFieldMapper.BooleanFieldType("mv_bool"), checks::mvBools, StatusChecks::mvBool)); + r.add(new FieldCase(new ProvidedIdFieldMapper(() -> false).fieldType(), checks::ids, StatusChecks::id)); + r.add(new FieldCase(TsidExtractingIdFieldMapper.INSTANCE.fieldType(), checks::ids, StatusChecks::id)); + r.add( + new FieldCase( new ValuesSourceReaderOperator.FieldInfo("constant_bytes", List.of(BlockLoader.constantBytes(new BytesRef("foo")))), - checks::checkConstantBytes + checks::constantBytes, + StatusChecks::constantBytes ) ); - infos.add( - new Info(new ValuesSourceReaderOperator.FieldInfo("null", List.of(BlockLoader.CONSTANT_NULLS)), checks::checkConstantNulls) - ); - - List operators = new ArrayList<>(); - Collections.shuffle(infos, random()); - - operators.add( - new ValuesSourceReaderOperator.Factory( - List.of(fieldInfo(new NumberFieldMapper.NumberFieldType("key", NumberFieldMapper.NumberType.INTEGER))), - List.of(reader), - 0 - ).get(driverContext) + r.add( + new FieldCase( + new ValuesSourceReaderOperator.FieldInfo("null", List.of(BlockLoader.CONSTANT_NULLS)), + checks::constantNulls, + StatusChecks::constantNulls + ) ); - List tests = new ArrayList<>(); - while (infos.isEmpty() == false) { - List b = randomNonEmptySubsetOf(infos); - infos.removeAll(b); - tests.addAll(b); - operators.add( - new ValuesSourceReaderOperator.Factory(b.stream().map(i -> i.info).toList(), List.of(reader), 0).get(driverContext) - ); - } - List results = drive(operators, input.iterator(), driverContext); - assertThat(results, hasSize(input.size())); - for (Page page : results) { - assertThat(page.getBlockCount(), equalTo(tests.size() + 2 /* one for doc and one for keys */)); - IntVector keys = page.getBlock(1).asVector(); - for (int p = 0; p < page.getPositionCount(); p++) { - int key = keys.getInt(p); - for (int i = 0; i < tests.size(); i++) { - try { - tests.get(i).check.check(page.getBlock(2 + i), p, key); - } catch (AssertionError e) { - throw new AssertionError("error checking " + tests.get(i).info.name() + "[" + p + "]: " + e.getMessage(), e); - } - } - } - } - for (Operator op : operators) { - assertThat(((ValuesSourceReaderOperator) op).status().pagesProcessed(), equalTo(input.size())); - } - assertDriverContext(driverContext); + Collections.shuffle(r, random()); + return r; } public void testWithNulls() throws IOException { - MappedFieldType intFt = new NumberFieldMapper.NumberFieldType("i", NumberFieldMapper.NumberType.INTEGER); - MappedFieldType longFt = new NumberFieldMapper.NumberFieldType("j", NumberFieldMapper.NumberType.LONG); - MappedFieldType doubleFt = new NumberFieldMapper.NumberFieldType("d", NumberFieldMapper.NumberType.DOUBLE); + MappedFieldType intFt = docValuesNumberField("i", NumberFieldMapper.NumberType.INTEGER); + MappedFieldType longFt = docValuesNumberField("j", NumberFieldMapper.NumberType.LONG); + MappedFieldType doubleFt = docValuesNumberField("d", NumberFieldMapper.NumberType.DOUBLE); MappedFieldType kwFt = new KeywordFieldMapper.KeywordFieldType("kw"); NumericDocValuesField intField = new NumericDocValuesField(intFt.name(), 0); @@ -659,6 +846,10 @@ public void testWithNulls() throws IOException { assertDriverContext(driverContext); } + private NumberFieldMapper.NumberFieldType docValuesNumberField(String name, NumberFieldMapper.NumberType type) { + return new NumberFieldMapper.NumberFieldType(name, type); + } + private NumberFieldMapper.NumberFieldType sourceNumberField(String name, NumberFieldMapper.NumberType type) { return new NumberFieldMapper.NumberFieldType( name,