From da92293f9ce0be1ac283c4a5d769af550abf7031 Mon Sep 17 00:00:00 2001 From: Richard Chen Date: Tue, 30 Apr 2024 09:07:24 +0800 Subject: [PATCH] [SPARK-48033][SQL] Fix `RuntimeReplaceable` expressions being used in default columns ### What changes were proposed in this pull request? Currently, default columns that have a default of a `RuntimeReplaceable` expression fails. This is because the `AlterTableCommand` constant folds before replacing expressions with the actual implementation. For example: ``` sql(s"CREATE TABLE t(v VARIANT DEFAULT parse_json('1')) USING PARQUET") sql("INSERT INTO t VALUES(DEFAULT)") ``` fails because `parse_json` is `RuntimeReplaceable` and is evaluated before the analyzer inserts the correct expression into the plan To fix this, we run the `ReplaceExpressions` rule before `ConstantFolding` ### Why are the changes needed? This allows default columns to use expressions that are `RuntimeReplaceable` This is especially important for Variant types because literal variants are difficult to create - `parse_json` will likely be used the majority of the time. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? added UT ### Was this patch authored or co-authored using generative AI tooling? no Closes #46269 from richardc-db/fix_default_cols_runtime_replaceable. Authored-by: Richard Chen Signed-off-by: Wenchen Fan --- .../sql/catalyst/util/ResolveDefaultColumnsUtil.scala | 4 ++-- .../org/apache/spark/sql/ResolveDefaultColumnsSuite.scala | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala index 7b00349a4f27a..d73e2ca6bd9d4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral} -import org.apache.spark.sql.catalyst.optimizer.ConstantFolding +import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ReplaceExpressions} import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION @@ -289,7 +289,7 @@ object ResolveDefaultColumns extends QueryErrorsBase val analyzer: Analyzer = DefaultColumnAnalyzer val analyzed = analyzer.execute(Project(Seq(Alias(parsed, colName)()), OneRowRelation())) analyzer.checkAnalysis(analyzed) - ConstantFolding(analyzed) + ConstantFolding(ReplaceExpressions(analyzed)) } catch { case ex: AnalysisException => throw QueryCompilationErrors.defaultValuesUnresolvedExprError( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala index 48a9564ab8f95..bca1472799939 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala @@ -279,4 +279,12 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession { checkAnswer(sql("select CAST(c as STRING) from t"), Row("2018-11-17 13:33:33")) } } + + test("SPARK-48033: default columns using runtime replaceable expression works") { + withTable("t") { + sql("CREATE TABLE t(v VARIANT DEFAULT parse_json('1')) USING PARQUET") + sql("INSERT INTO t VALUES(DEFAULT)") + checkAnswer(sql("select v from t"), sql("select parse_json('1')").collect()) + } + } }