Skip to content

Commit

Permalink
ESQL: Simplify CombineProjections (elastic#117882)
Browse files Browse the repository at this point in the history
Make combineUpperGroupingsAndLowerProjections a bit simpler.
Also slightly improve a test and add comments to provide more context.
  • Loading branch information
alex-spies authored Dec 3, 2024
1 parent cab6dc5 commit cca7051
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan;

import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;

public final class CombineProjections extends OptimizerRules.OptimizerRule<UnaryPlan> {
Expand Down Expand Up @@ -144,30 +145,31 @@ private static List<Expression> combineUpperGroupingsAndLowerProjections(
List<? extends NamedExpression> upperGroupings,
List<? extends NamedExpression> lowerProjections
) {
assert upperGroupings.size() <= 1
|| upperGroupings.stream().anyMatch(group -> group.anyMatch(expr -> expr instanceof Categorize)) == false
: "CombineProjections only tested with a single CATEGORIZE with no additional groups";
// Collect the alias map for resolving the source (f1 = 1, f2 = f1, etc..)
AttributeMap<Expression> aliases = new AttributeMap<>();
AttributeMap<Attribute> aliases = new AttributeMap<>();
for (NamedExpression ne : lowerProjections) {
// record the alias
aliases.put(ne.toAttribute(), Alias.unwrap(ne));
// Record the aliases.
// Projections are just aliases for attributes, so casting is safe.
aliases.put(ne.toAttribute(), (Attribute) Alias.unwrap(ne));
}
// Replace any matching attribute directly with the aliased attribute from the projection.
AttributeSet seen = new AttributeSet();
List<Expression> replaced = new ArrayList<>();

// Propagate any renames from the lower projection into the upper groupings.
// This can lead to duplicates: e.g.
// | EVAL x = y | STATS ... BY x, y
// All substitutions happen before; groupings must be attributes at this point except for CATEGORIZE which will be an alias like
// `c = CATEGORIZE(attribute)`.
// Therefore, it is correct to deduplicate based on simple equality (based on names) instead of name ids (Set vs. AttributeSet).
// TODO: The deduplication based on simple equality will be insufficient in case of multiple CATEGORIZEs, e.g. for
// `| EVAL x = y | STATS ... BY CATEGORIZE(x), CATEGORIZE(y)`. That will require semantic equality instead.
LinkedHashSet<NamedExpression> resolvedGroupings = new LinkedHashSet<>();
for (NamedExpression ne : upperGroupings) {
// Duplicated attributes are ignored.
if (ne instanceof Attribute attribute) {
var newExpression = aliases.resolve(attribute, attribute);
if (newExpression instanceof Attribute newAttribute && seen.add(newAttribute) == false) {
// Already seen, skip
continue;
}
replaced.add(newExpression);
} else {
// For grouping functions, this will replace nested properties too
replaced.add(ne.transformUp(Attribute.class, a -> aliases.resolve(a, a)));
}
NamedExpression transformed = (NamedExpression) ne.transformUp(Attribute.class, a -> aliases.resolve(a, a));
resolvedGroupings.add(transformed);
}
return replaced;
return new ArrayList<>(resolvedGroupings);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,7 @@ public void testCombineProjectionWithCategorizeGrouping() {
var plan = plan("""
from test
| eval k = first_name, k1 = k
| stats s = sum(salary) by cat = CATEGORIZE(k)
| stats s = sum(salary) by cat = CATEGORIZE(k1)
| keep s, cat
""");

Expand Down

0 comments on commit cca7051

Please sign in to comment.