From ec18b3017ca2c60c60e986ba4a761833daaa4315 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Tue, 10 Sep 2024 17:20:11 +0800 Subject: [PATCH 1/4] Support non-UTC timezone for casting from date type to timestamp type Signed-off-by: Chong Gao --- integration_tests/src/main/python/date_time_test.py | 13 ++++++++++++- .../scala/com/nvidia/spark/rapids/GpuCast.scala | 6 ++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 0c877f00238..c2a6a604945 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -13,7 +13,7 @@ # limitations under the License. import pytest -from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_gpu_and_cpu_error +from asserts import assert_gpu_and_cpu_are_equal_collect, assert_gpu_fallback_collect, assert_gpu_and_cpu_error, assert_gpu_and_cpu_are_equal_sql from conftest import is_utc, is_supported_time_zone, get_test_tz from data_gen import * from datetime import date, datetime, timezone @@ -671,3 +671,14 @@ def test_timestamp_millis_long_overflow(): def test_timestamp_micros(data_gen): assert_gpu_and_cpu_are_equal_collect( lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_micros(a)")) + + +@pytest.mark.parametrize('parser_policy', ["LEGACY", "CORRECTED", "EXCEPTION"], ids=idfn) +def test_date_to_timestamp(parser_policy): + parser_policy_dic = {"spark.sql.legacy.timeParserPolicy": "{}".format(parser_policy)} + incompatible_dic = {"spark.rapids.sql.incompatibleDateFormats.enabled": True} + assert_gpu_and_cpu_are_equal_sql( + lambda spark : unary_op_df(spark, date_gen), + "tab", + "SELECT cast(a as timestamp) from tab", + conf=copy_and_update(parser_policy_dic, incompatible_dic)) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala index 8ae3450c0af..f101b8a33eb 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuCast.scala @@ -90,6 +90,7 @@ abstract class CastExprMetaBase[INPUT <: UnaryExpression with TimeZoneAwareExpre override def isTimeZoneSupported: Boolean = { (fromType, toType) match { case (TimestampType, DateType) => true // this is for to_date(...) + case (DateType, TimestampType) => true case _ => false } } @@ -631,6 +632,11 @@ object GpuCast { zoneId.normalized())) { shifted => shifted.castTo(GpuColumnVector.getNonNestedRapidsType(toDataType)) } + case (DateType, TimestampType) if options.timeZoneId.isDefined => + val zoneId = DateTimeUtils.getZoneId(options.timeZoneId.get) + withResource(input.castTo(GpuColumnVector.getNonNestedRapidsType(toDataType))) { cv => + GpuTimeZoneDB.fromTimestampToUtcTimestamp(cv, zoneId.normalized()) + } case _ => input.castTo(GpuColumnVector.getNonNestedRapidsType(toDataType)) } From a28affe9af6e7507b094a526df5feab7aa6522c9 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Wed, 11 Sep 2024 14:30:18 +0800 Subject: [PATCH 2/4] Refactor test case --- integration_tests/src/main/python/date_time_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index c2a6a604945..7d5700b7f4c 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -675,10 +675,10 @@ def test_timestamp_micros(data_gen): @pytest.mark.parametrize('parser_policy', ["LEGACY", "CORRECTED", "EXCEPTION"], ids=idfn) def test_date_to_timestamp(parser_policy): - parser_policy_dic = {"spark.sql.legacy.timeParserPolicy": "{}".format(parser_policy)} - incompatible_dic = {"spark.rapids.sql.incompatibleDateFormats.enabled": True} assert_gpu_and_cpu_are_equal_sql( lambda spark : unary_op_df(spark, date_gen), "tab", "SELECT cast(a as timestamp) from tab", - conf=copy_and_update(parser_policy_dic, incompatible_dic)) + conf = { + "spark.sql.legacy.timeParserPolicy": "{}".format(parser_policy), + "spark.rapids.sql.incompatibleDateFormats.enabled": True}) From 8e305011ce26878b6cc95bf111284d5b4bd4a552 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 19 Sep 2024 10:03:12 +0800 Subject: [PATCH 3/4] Update test cases --- integration_tests/src/main/python/cast_test.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/integration_tests/src/main/python/cast_test.py b/integration_tests/src/main/python/cast_test.py index f7784178182..044f1d46322 100644 --- a/integration_tests/src/main/python/cast_test.py +++ b/integration_tests/src/main/python/cast_test.py @@ -726,8 +726,6 @@ def test_cast_int_to_string_not_UTC(): {"spark.sql.session.timeZone": "+08"}) not_utc_fallback_test_params = [(timestamp_gen, 'STRING'), - # python does not like year 0, and with time zones the default start date can become year 0 :( - (DateGen(start=date(1, 1, 1)), 'TIMESTAMP'), (SetValuesGen(StringType(), ['2023-03-20 10:38:50', '2023-03-20 10:39:02']), 'TIMESTAMP')] @allow_non_gpu('ProjectExec') From 734a4c1a84ea9a53bc064e45112eeeaf8e590906 Mon Sep 17 00:00:00 2001 From: Chong Gao Date: Thu, 19 Sep 2024 18:20:59 +0800 Subject: [PATCH 4/4] Update cases --- integration_tests/src/main/python/date_time_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/integration_tests/src/main/python/date_time_test.py b/integration_tests/src/main/python/date_time_test.py index 7d5700b7f4c..a0b2fdd9327 100644 --- a/integration_tests/src/main/python/date_time_test.py +++ b/integration_tests/src/main/python/date_time_test.py @@ -673,12 +673,13 @@ def test_timestamp_micros(data_gen): lambda spark : unary_op_df(spark, data_gen).selectExpr("timestamp_micros(a)")) -@pytest.mark.parametrize('parser_policy', ["LEGACY", "CORRECTED", "EXCEPTION"], ids=idfn) +@pytest.mark.skipif(not is_supported_time_zone(), reason="not all time zones are supported now, refer to https://github.com/NVIDIA/spark-rapids/issues/6839, please update after all time zones are supported") +@pytest.mark.parametrize('parser_policy', ['LEGACY', 'CORRECTED', 'EXCEPTION'], ids=idfn) def test_date_to_timestamp(parser_policy): assert_gpu_and_cpu_are_equal_sql( lambda spark : unary_op_df(spark, date_gen), "tab", "SELECT cast(a as timestamp) from tab", conf = { - "spark.sql.legacy.timeParserPolicy": "{}".format(parser_policy), + "spark.sql.legacy.timeParserPolicy": parser_policy, "spark.rapids.sql.incompatibleDateFormats.enabled": True})