Skip to content

Commit

Permalink
[SPARK-48356][SQL] Support for FOR statement
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
In this PR, support for FOR statement in SQL scripting is introduced. Examples:

```
FOR row AS SELECT * FROM t DO
   SELECT row.intCol;
 END FOR;
```

```
FOR SELECT * FROM t DO
   SELECT intCol;
 END FOR;
```

Implementation notes:
As local variables for SQL scripting are currently a work in progress, session variables are used to simulate them.
When FOR begins executing, session variables are declared for each column in the result set, and optionally for the for variable if it is present ("row" in the example above).
On each iteration, these variables are overwritten with the values from the row currently being iterated.
The variables are dropped upon loop completion.

This means that if a session variable which matches the name of a column in the result set already exists, the for statement will drop that variable after completion. If that variable would be referenced after the for statement, the script would fail as the variable would not exist. This limitation is already present in the current iteration of SQL scripting, and will be fixed once local variables are introduced. Also, with local variables the implementation of for statement will be much simpler.

Grammar/parser changes:
`forStatement` grammar rule
`visitForStatement` rule visitor
`ForStatement` logical operator

### Why are the changes needed?
FOR statement is an part of SQL scripting control flow logic.

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
New tests are introduced to all of the three scripting test suites: `SqlScriptingParserSuite`, `SqlScriptingExecutionNodeSuite` and `SqlScriptingInterpreterSuite`.

### Was this patch authored or co-authored using generative AI tooling?
No

Closes #48794 from dusantism-db/scripting-for-loop.

Authored-by: Dušan Tišma <dusan.tisma@databricks.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
dusantism-db authored and cloud-fan committed Nov 28, 2024
1 parent 4eed184 commit 2c2c0e0
Show file tree
Hide file tree
Showing 8 changed files with 2,006 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ compoundStatement
| leaveStatement
| iterateStatement
| loopStatement
| forStatement
;

setStatementWithOptionalVarKeyword
Expand Down Expand Up @@ -111,6 +112,10 @@ loopStatement
: beginLabel? LOOP compoundBody END LOOP endLabel?
;

forStatement
: beginLabel? FOR (multipartIdentifier AS)? query DO compoundBody END FOR endLabel?
;

singleStatement
: (statement|setResetStatement) SEMICOLON* EOF
;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,8 @@ class AstBuilder extends DataTypeAstBuilder
visitSearchedCaseStatementImpl(searchedCaseContext, labelCtx)
case simpleCaseContext: SimpleCaseStatementContext =>
visitSimpleCaseStatementImpl(simpleCaseContext, labelCtx)
case forStatementContext: ForStatementContext =>
visitForStatementImpl(forStatementContext, labelCtx)
case stmt => visit(stmt).asInstanceOf[CompoundPlanStatement]
}
} else {
Expand Down Expand Up @@ -347,28 +349,48 @@ class AstBuilder extends DataTypeAstBuilder
RepeatStatement(condition, body, Some(labelText))
}

private def visitForStatementImpl(
ctx: ForStatementContext,
labelCtx: SqlScriptingLabelContext): ForStatement = {
val labelText = labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))

val queryCtx = ctx.query()
val query = withOrigin(queryCtx) {
SingleStatement(visitQuery(queryCtx))
}
val varName = Option(ctx.multipartIdentifier()).map(_.getText)
val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
labelCtx.exitLabeledScope(Option(ctx.beginLabel()))

ForStatement(query, varName, body, Some(labelText))
}

private def leaveOrIterateContextHasLabel(
ctx: RuleContext, label: String, isIterate: Boolean): Boolean = {
ctx match {
case c: BeginEndCompoundBlockContext
if Option(c.beginLabel()).isDefined &&
c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label) =>
if (isIterate) {
if Option(c.beginLabel()).exists { b =>
b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
} => if (isIterate) {
throw SqlScriptingErrors.invalidIterateLabelUsageForCompound(CurrentOrigin.get, label)
}
true
case c: WhileStatementContext
if Option(c.beginLabel()).isDefined &&
c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
=> true
if Option(c.beginLabel()).exists { b =>
b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
} => true
case c: RepeatStatementContext
if Option(c.beginLabel()).isDefined &&
c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
=> true
if Option(c.beginLabel()).exists { b =>
b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
} => true
case c: LoopStatementContext
if Option(c.beginLabel()).isDefined &&
c.beginLabel().multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
=> true
if Option(c.beginLabel()).exists { b =>
b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
} => true
case c: ForStatementContext
if Option(c.beginLabel()).exists { b =>
b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
} => true
case _ => false
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -267,3 +267,31 @@ case class LoopStatement(
LoopStatement(newChildren(0).asInstanceOf[CompoundBody], label)
}
}

/**
* Logical operator for FOR statement.
* @param query Query which is executed once, then it's result set is iterated on, row by row.
* @param variableName Name of variable which is used to access the current row during iteration.
* @param body Compound body is a collection of statements that are executed for each row in
* the result set of the query.
* @param label An optional label for the loop which is unique amongst all labels for statements
* within which the FOR statement is contained.
* If an end label is specified it must match the beginning label.
* The label can be used to LEAVE or ITERATE the loop.
*/
case class ForStatement(
query: SingleStatement,
variableName: Option[String],
body: CompoundBody,
label: Option[String]) extends CompoundPlanStatement {

override def output: Seq[Attribute] = Seq.empty

override def children: Seq[LogicalPlan] = Seq(query, body)

override protected def withNewChildrenInternal(
newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = newChildren match {
case IndexedSeq(query: SingleStatement, body: CompoundBody) =>
ForStatement(query, variableName, body, label)
}
}
Loading

0 comments on commit 2c2c0e0

Please sign in to comment.