Skip to content

Commit

Permalink
[SPARK-50313][SQL][TESTS] Enable ANSI in SQL *SQLQueryTestSuite by de…
Browse files Browse the repository at this point in the history
…fault

### What changes were proposed in this pull request?

This PR enables ANSI in SQL *SQLQueryTestSuite by default for regular tests and switches the original ansi/ ones to nonansi, following SPARK-44444, which turn on ansi for prod,

### Why are the changes needed?

test improvements

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
new golden files

### Was this patch authored or co-authored using generative AI tooling?
no

Closes #48842 from yaooqinn/SPARK-50313.

Authored-by: Kent Yao <yao@apache.org>
Signed-off-by: Kent Yao <yao@apache.org>
  • Loading branch information
yaooqinn committed Nov 18, 2024
1 parent 281a8e1 commit b626528
Show file tree
Hide file tree
Showing 172 changed files with 13,906 additions and 10,703 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ select
size(timestamp_array)
from primitive_arrays
-- !query analysis
Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x]
Project [size(boolean_array#x, false) AS size(boolean_array)#x, size(tinyint_array#x, false) AS size(tinyint_array)#x, size(smallint_array#x, false) AS size(smallint_array)#x, size(int_array#x, false) AS size(int_array)#x, size(bigint_array#x, false) AS size(bigint_array)#x, size(decimal_array#x, false) AS size(decimal_array)#x, size(double_array#x, false) AS size(double_array)#x, size(float_array#x, false) AS size(float_array)#x, size(date_array#x, false) AS size(date_array)#x, size(timestamp_array#x, false) AS size(timestamp_array)#x]
+- SubqueryAlias primitive_arrays
+- View (`primitive_arrays`, [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x])
+- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
Expand All @@ -224,70 +224,70 @@ Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_arra
-- !query
select element_at(array(1, 2, 3), 5)
-- !query analysis
Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x]
Project [element_at(array(1, 2, 3), 5, None, true) AS element_at(array(1, 2, 3), 5)#x]
+- OneRowRelation


-- !query
select element_at(array(1, 2, 3), -5)
-- !query analysis
Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x]
Project [element_at(array(1, 2, 3), -5, None, true) AS element_at(array(1, 2, 3), -5)#x]
+- OneRowRelation


-- !query
select element_at(array(1, 2, 3), 0)
-- !query analysis
Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x]
Project [element_at(array(1, 2, 3), 0, None, true) AS element_at(array(1, 2, 3), 0)#x]
+- OneRowRelation


-- !query
select elt(4, '123', '456')
-- !query analysis
Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x]
Project [elt(4, 123, 456, true) AS elt(4, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(0, '123', '456')
-- !query analysis
Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x]
Project [elt(0, 123, 456, true) AS elt(0, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(-1, '123', '456')
-- !query analysis
Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x]
Project [elt(-1, 123, 456, true) AS elt(-1, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(null, '123', '456')
-- !query analysis
Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x]
Project [elt(cast(null as int), 123, 456, true) AS elt(NULL, 123, 456)#x]
+- OneRowRelation


-- !query
select elt(null, '123', null)
-- !query analysis
Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x]
Project [elt(cast(null as int), 123, cast(null as string), true) AS elt(NULL, 123, NULL)#x]
+- OneRowRelation


-- !query
select elt(1, '123', null)
-- !query analysis
Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x]
Project [elt(1, 123, cast(null as string), true) AS elt(1, 123, NULL)#x]
+- OneRowRelation


-- !query
select elt(2, '123', null)
-- !query analysis
Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x]
Project [elt(2, 123, cast(null as string), true) AS elt(2, 123, NULL)#x]
+- OneRowRelation


Expand Down Expand Up @@ -360,21 +360,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
-- !query
select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
-- !query analysis
Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), false) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+- OneRowRelation


-- !query
select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
-- !query analysis
Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+- OneRowRelation


-- !query
select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
-- !query analysis
Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
+- OneRowRelation


Expand Down
189 changes: 171 additions & 18 deletions sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -205,57 +205,193 @@ Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x]
-- !query
SELECT HEX(CAST(CAST(123 AS byte) AS binary))
-- !query analysis
Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
"srcType" : "\"TINYINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 44,
"fragment" : "CAST(CAST(123 AS byte) AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
-- !query analysis
Project [hex(cast(cast(-123 as tinyint) as binary)) AS hex(CAST(CAST(-123 AS TINYINT) AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(CAST(-123 AS TINYINT) AS BINARY)\"",
"srcType" : "\"TINYINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 45,
"fragment" : "CAST(CAST(-123 AS byte) AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(123S AS binary))
-- !query analysis
Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(123 AS BINARY)\"",
"srcType" : "\"SMALLINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 31,
"fragment" : "CAST(123S AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(-123S AS binary))
-- !query analysis
Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(-123 AS BINARY)\"",
"srcType" : "\"SMALLINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 32,
"fragment" : "CAST(-123S AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(123 AS binary))
-- !query analysis
Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(123 AS BINARY)\"",
"srcType" : "\"INT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 30,
"fragment" : "CAST(123 AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(-123 AS binary))
-- !query analysis
Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(-123 AS BINARY)\"",
"srcType" : "\"INT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 31,
"fragment" : "CAST(-123 AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(123L AS binary))
-- !query analysis
Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(123 AS BINARY)\"",
"srcType" : "\"BIGINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 31,
"fragment" : "CAST(123L AS binary)"
} ]
}


-- !query
SELECT HEX(CAST(-123L AS binary))
-- !query analysis
Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(-123 AS BINARY)\"",
"srcType" : "\"BIGINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 32,
"fragment" : "CAST(-123L AS binary)"
} ]
}


-- !query
Expand Down Expand Up @@ -804,8 +940,25 @@ Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x]
-- !query
SELECT HEX((123 :: byte) :: binary)
-- !query analysis
Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x]
+- OneRowRelation
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
"sqlState" : "42K09",
"messageParameters" : {
"config" : "\"spark.sql.ansi.enabled\"",
"configVal" : "'false'",
"sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
"srcType" : "\"TINYINT\"",
"targetType" : "\"BINARY\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 12,
"stopIndex" : 34,
"fragment" : "(123 :: byte) :: binary"
} ]
}


-- !query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -884,7 +884,7 @@ Project [concat_ws(,, cast(utf8_lcase#x as string), collate(word, utf8_binary))
-- !query
select elt(2, s, utf8_binary) from t5
-- !query analysis
Project [elt(2, s#x, utf8_binary#x, false) AS elt(2, s, utf8_binary)#x]
Project [elt(2, s#x, utf8_binary#x, true) AS elt(2, s, utf8_binary)#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet

Expand Down Expand Up @@ -918,31 +918,31 @@ org.apache.spark.sql.AnalysisException
-- !query
select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_binary) from t5
-- !query analysis
Project [elt(1, collate(utf8_binary#x, utf8_binary), collate(utf8_lcase#x, utf8_binary), false) AS elt(1, collate(utf8_binary, utf8_binary), collate(utf8_lcase, utf8_binary))#x]
Project [elt(1, collate(utf8_binary#x, utf8_binary), collate(utf8_lcase#x, utf8_binary), true) AS elt(1, collate(utf8_binary, utf8_binary), collate(utf8_lcase, utf8_binary))#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet


-- !query
select elt(1, utf8_binary collate utf8_binary, utf8_lcase) from t5
-- !query analysis
Project [elt(1, collate(utf8_binary#x, utf8_binary), cast(utf8_lcase#x as string), false) AS elt(1, collate(utf8_binary, utf8_binary), utf8_lcase)#x]
Project [elt(1, collate(utf8_binary#x, utf8_binary), cast(utf8_lcase#x as string), true) AS elt(1, collate(utf8_binary, utf8_binary), utf8_lcase)#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet


-- !query
select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
-- !query analysis
Project [elt(1, utf8_binary#x, word, false) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, cast(word as string collate UTF8_LCASE), false) AS elt(1, utf8_lcase, word)#x]
Project [elt(1, utf8_binary#x, word, true) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, cast(word as string collate UTF8_LCASE), true) AS elt(1, utf8_lcase, word)#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet


-- !query
select elt(1, utf8_binary, 'word' collate utf8_lcase), elt(1, utf8_lcase, 'word' collate utf8_binary) from t5
-- !query analysis
Project [elt(1, cast(utf8_binary#x as string collate UTF8_LCASE), collate(word, utf8_lcase), false) AS elt(1, utf8_binary, collate(word, utf8_lcase))#x, elt(1, cast(utf8_lcase#x as string), collate(word, utf8_binary), false) AS elt(1, utf8_lcase, collate(word, utf8_binary))#x]
Project [elt(1, cast(utf8_binary#x as string collate UTF8_LCASE), collate(word, utf8_lcase), true) AS elt(1, utf8_binary, collate(word, utf8_lcase))#x, elt(1, cast(utf8_lcase#x as string), collate(word, utf8_binary), true) AS elt(1, utf8_lcase, collate(word, utf8_binary))#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet

Expand Down
Loading

0 comments on commit b626528

Please sign in to comment.