diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index d2873a388617e..68b51440278cb 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -8864,6 +8864,13 @@ def curdate() -> Column: :class:`~pyspark.sql.Column` current date. + See Also + -------- + :meth:`pyspark.sql.functions.now` + :meth:`pyspark.sql.functions.current_date` + :meth:`pyspark.sql.functions.current_timestamp` + :meth:`pyspark.sql.functions.localtimestamp` + Examples -------- >>> import pyspark.sql.functions as sf @@ -8893,6 +8900,13 @@ def current_date() -> Column: :class:`~pyspark.sql.Column` current date. + See Also + -------- + :meth:`pyspark.sql.functions.now` + :meth:`pyspark.sql.functions.curdate` + :meth:`pyspark.sql.functions.current_timestamp` + :meth:`pyspark.sql.functions.localtimestamp` + Examples -------- >>> from pyspark.sql import functions as sf @@ -8920,14 +8934,26 @@ def current_timezone() -> Column: Examples -------- - >>> from pyspark.sql import functions as sf >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") + + >>> from pyspark.sql import functions as sf >>> spark.range(1).select(sf.current_timezone()).show() +-------------------+ | current_timezone()| +-------------------+ |America/Los_Angeles| +-------------------+ + + Switch the timezone to Shanghai. + + >>> spark.conf.set("spark.sql.session.timeZone", "Asia/Shanghai") + >>> spark.range(1).select(sf.current_timezone()).show() + +------------------+ + |current_timezone()| + +------------------+ + | Asia/Shanghai| + +------------------+ + >>> spark.conf.unset("spark.sql.session.timeZone") """ return _invoke_function("current_timezone") @@ -8949,6 +8975,13 @@ def current_timestamp() -> Column: :class:`~pyspark.sql.Column` current date and time. + See Also + -------- + :meth:`pyspark.sql.functions.now` + :meth:`pyspark.sql.functions.curdate` + :meth:`pyspark.sql.functions.current_date` + :meth:`pyspark.sql.functions.localtimestamp` + Examples -------- >>> from pyspark.sql import functions as sf @@ -8974,6 +9007,13 @@ def now() -> Column: :class:`~pyspark.sql.Column` current timestamp at the start of query evaluation. + See Also + -------- + :meth:`pyspark.sql.functions.curdate` + :meth:`pyspark.sql.functions.current_date` + :meth:`pyspark.sql.functions.current_timestamp` + :meth:`pyspark.sql.functions.localtimestamp` + Examples -------- >>> from pyspark.sql import functions as sf @@ -9004,6 +9044,13 @@ def localtimestamp() -> Column: :class:`~pyspark.sql.Column` current local date and time. + See Also + -------- + :meth:`pyspark.sql.functions.now` + :meth:`pyspark.sql.functions.curdate` + :meth:`pyspark.sql.functions.current_date` + :meth:`pyspark.sql.functions.current_timestamp` + Examples -------- >>> from pyspark.sql import functions as sf @@ -9044,6 +9091,15 @@ def date_format(date: "ColumnOrName", format: str) -> Column: format: literal string format to use to represent datetime values. + See Also + -------- + :meth:`pyspark.sql.functions.to_date` + :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_timestamp_ntz` + :meth:`pyspark.sql.functions.to_utc_timestamp` + :meth:`pyspark.sql.functions.try_to_timestamp` + Returns ------- :class:`~pyspark.sql.Column` @@ -9130,6 +9186,18 @@ def year(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` year part of the date/timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Examples -------- Example 1: Extract the year from a string column representing dates @@ -9209,6 +9277,18 @@ def quarter(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` quarter of the date/timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Examples -------- Example 1: Extract the quarter from a string column representing dates @@ -9288,6 +9368,19 @@ def month(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` month part of the date/timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` + :meth:`pyspark.sql.functions.monthname` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Examples -------- Example 1: Extract the month from a string column representing dates @@ -9368,6 +9461,12 @@ def dayofweek(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` day of the week for given date/timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.dayofyear` + :meth:`pyspark.sql.functions.dayofmonth` + Examples -------- Example 1: Extract the day of the week from a string column representing dates @@ -9442,6 +9541,12 @@ def dayofmonth(col: "ColumnOrName") -> Column: col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. + See Also + -------- + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.dayofyear` + :meth:`pyspark.sql.functions.dayofweek` + Returns ------- :class:`~pyspark.sql.Column` @@ -9523,6 +9628,22 @@ def day(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` day of the month for given date/timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` + :meth:`pyspark.sql.functions.dayname` + :meth:`pyspark.sql.functions.dayofyear` + :meth:`pyspark.sql.functions.dayofmonth` + :meth:`pyspark.sql.functions.dayofweek` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Examples -------- Example 1: Extract the day of the month from a string column representing dates @@ -9602,6 +9723,12 @@ def dayofyear(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` day of the year for given date/timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.dayofyear` + :meth:`pyspark.sql.functions.dayofmonth` + Examples -------- Example 1: Extract the day of the year from a string column representing dates @@ -9681,6 +9808,18 @@ def hour(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` hour part of the timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Examples -------- Example 1: Extract the hours from a string column representing timestamp @@ -9728,6 +9867,18 @@ def minute(col: "ColumnOrName") -> Column: col : :class:`~pyspark.sql.Column` or column name target date/timestamp column to work on. + See Also + -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.second` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Returns ------- :class:`~pyspark.sql.Column` @@ -9785,6 +9936,18 @@ def second(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` `seconds` part of the timestamp as integer. + See Also + -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.extract` + :meth:`pyspark.sql.functions.datepart` + :meth:`pyspark.sql.functions.date_part` + Examples -------- Example 1: Extract the seconds from a string column representing timestamp @@ -9839,6 +10002,10 @@ def weekofyear(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` `week` of the year for given date as integer. + See Also + -------- + :meth:`pyspark.sql.functions.weekday` + Examples -------- Example 1: Extract the week of the year from a string column representing dates @@ -9915,6 +10082,11 @@ def weekday(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday). + See Also + -------- + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.weekofyear` + Examples -------- Example 1: Extract the day of the week from a string column representing dates @@ -9991,6 +10163,11 @@ def monthname(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` the three-letter abbreviation of month name for date/timestamp (Jan, Feb, Mar...) + See Also + -------- + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.dayname` + Examples -------- Example 1: Extract the month name from a string column representing dates @@ -10067,6 +10244,11 @@ def dayname(col: "ColumnOrName") -> Column: :class:`~pyspark.sql.Column` the three-letter abbreviation of day name for date/timestamp (Mon, Tue, Wed...) + See Also + -------- + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.monthname` + Examples -------- Example 1: Extract the weekday name from a string column representing dates @@ -10147,6 +10329,13 @@ def extract(field: Column, source: "ColumnOrName") -> Column: See Also -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` :meth:`pyspark.sql.functions.datepart` :meth:`pyspark.sql.functions.date_part` @@ -10195,6 +10384,13 @@ def date_part(field: Column, source: "ColumnOrName") -> Column: See Also -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` :meth:`pyspark.sql.functions.datepart` :meth:`pyspark.sql.functions.extract` @@ -10243,6 +10439,13 @@ def datepart(field: Column, source: "ColumnOrName") -> Column: See Also -------- + :meth:`pyspark.sql.functions.year` + :meth:`pyspark.sql.functions.quarter` + :meth:`pyspark.sql.functions.month` + :meth:`pyspark.sql.functions.day` + :meth:`pyspark.sql.functions.hour` + :meth:`pyspark.sql.functions.minute` + :meth:`pyspark.sql.functions.second` :meth:`pyspark.sql.functions.date_part` :meth:`pyspark.sql.functions.extract` @@ -10780,7 +10983,11 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column: See Also -------- :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_timestamp_ntz` + :meth:`pyspark.sql.functions.to_utc_timestamp` :meth:`pyspark.sql.functions.try_to_timestamp` + :meth:`pyspark.sql.functions.date_format` Examples -------- @@ -11018,7 +11225,12 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column: See Also -------- :meth:`pyspark.sql.functions.to_date` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_timestamp_ntz` + :meth:`pyspark.sql.functions.to_utc_timestamp` + :meth:`pyspark.sql.functions.to_unix_timestamp` :meth:`pyspark.sql.functions.try_to_timestamp` + :meth:`pyspark.sql.functions.date_format` Examples -------- @@ -11072,6 +11284,8 @@ def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = Non -------- :meth:`pyspark.sql.functions.to_date` :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_utc_timestamp` + :meth:`pyspark.sql.functions.date_format` Examples -------- @@ -11646,6 +11860,9 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Col See Also -------- :meth:`pyspark.sql.functions.to_utc_timestamp` + :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_timestamp_ntz` Examples -------- @@ -11712,6 +11929,9 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Colum See Also -------- :meth:`pyspark.sql.functions.from_utc_timestamp` + :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_timestamp_ntz` Examples -------- @@ -12034,22 +12254,22 @@ def window( Parameters ---------- - timeColumn : :class:`~pyspark.sql.Column` + timeColumn : :class:`~pyspark.sql.Column` or column name The column or the expression to use as the timestamp for windowing by time. The time column must be of TimestampType or TimestampNTZType. - windowDuration : str + windowDuration : literal string A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers. Note that the duration is a fixed length of time, and does not vary over time according to a calendar. For example, `1 day` always means 86,400,000 milliseconds, not a calendar day. - slideDuration : str, optional + slideDuration : literal string, optional A new window will be generated every `slideDuration`. Must be less than or equal to the `windowDuration`. Check `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers. This duration is likewise absolute, and does not vary according to a calendar. - startTime : str, optional + startTime : literal string, optional The offset with respect to 1970-01-01 00:00:00 UTC with which to start window intervals. For example, in order to have hourly tumbling windows that start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide @@ -12060,24 +12280,30 @@ def window( :class:`~pyspark.sql.Column` the column for computed results. + See Also + -------- + :meth:`pyspark.sql.functions.window_time` + :meth:`pyspark.sql.functions.session_window` + Examples -------- >>> import datetime >>> from pyspark.sql import functions as sf - >>> df = spark.createDataFrame( - ... [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], - ... ).toDF("date", "val") - >>> w = df.groupBy(sf.window("date", "5 seconds")).agg(sf.sum("val").alias("sum")) - >>> w.select( - ... w.window.start.cast("string").alias("start"), - ... w.window.end.cast("string").alias("end"), - ... "sum" - ... ).show() - +-------------------+-------------------+---+ - | start| end|sum| - +-------------------+-------------------+---+ - |2016-03-11 09:00:05|2016-03-11 09:00:10| 1| - +-------------------+-------------------+---+ + >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ['dt', 'v']) + >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v')) + >>> df2.show(truncate=False) + +------------------------------------------+------+ + |window |sum(v)| + +------------------------------------------+------+ + |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1 | + +------------------------------------------+------+ + + >>> df2.printSchema() + root + |-- window: struct (nullable = false) + | |-- start: timestamp (nullable = true) + | |-- end: timestamp (nullable = true) + |-- sum(v): long (nullable = true) """ from pyspark.sql.classic.column import _to_java_column @@ -12123,7 +12349,7 @@ def window_time( Parameters ---------- - windowColumn : :class:`~pyspark.sql.Column` + windowColumn : :class:`~pyspark.sql.Column` or column name The window column of a window aggregate records. Returns @@ -12131,29 +12357,29 @@ def window_time( :class:`~pyspark.sql.Column` the column for computed results. - Notes - ----- - Supports Spark Connect. + See Also + -------- + :meth:`pyspark.sql.functions.window` + :meth:`pyspark.sql.functions.session_window` Examples -------- >>> import datetime - >>> df = spark.createDataFrame( - ... [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], - ... ).toDF("date", "val") + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ['dt', 'v']) Group the data into 5 second time windows and aggregate as sum. - >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum")) + >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v')) Extract the window event time using the window_time function. - >>> w.select( - ... w.window.end.cast("string").alias("end"), - ... window_time(w.window).cast("string").alias("window_time"), - ... "sum" - ... ).collect() - [Row(end='2016-03-11 09:00:10', window_time='2016-03-11 09:00:09.999999', sum=1)] + >>> df2.select('*', sf.window_time('window')).show(truncate=False) + +------------------------------------------+------+--------------------------+ + |window |sum(v)|window_time(window) | + +------------------------------------------+------+--------------------------+ + |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1 |2016-03-11 09:00:09.999999| + +------------------------------------------+------+--------------------------+ """ from pyspark.sql.classic.column import _to_java_column @@ -12187,10 +12413,10 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) Parameters ---------- - timeColumn : :class:`~pyspark.sql.Column` or str + timeColumn : :class:`~pyspark.sql.Column` or column name The column name or column to use as the timestamp for windowing by time. The time column must be of TimestampType or TimestampNTZType. - gapDuration : :class:`~pyspark.sql.Column` or str + gapDuration : :class:`~pyspark.sql.Column` or literal string A Python string literal or column specifying the timeout of the session. It could be static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap duration dynamically based on the input row. @@ -12200,17 +12426,29 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) :class:`~pyspark.sql.Column` the column for computed results. + See Also + -------- + :meth:`pyspark.sql.functions.window` + :meth:`pyspark.sql.functions.window_time` + Examples -------- - >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val") - >>> w = df.groupBy(session_window("date", "5 seconds")).agg(sum("val").alias("sum")) - >>> w.select(w.session_window.start.cast("string").alias("start"), - ... w.session_window.end.cast("string").alias("end"), "sum").collect() - [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)] - >>> w = df.groupBy(session_window("date", lit("5 seconds"))).agg(sum("val").alias("sum")) - >>> w.select(w.session_window.start.cast("string").alias("start"), - ... w.session_window.end.cast("string").alias("end"), "sum").collect() - [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)] + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([('2016-03-11 09:00:07', 1)], ['dt', 'v']) + >>> df2 = df.groupBy(sf.session_window('dt', '5 seconds')).agg(sf.sum('v')) + >>> df2.show(truncate=False) + +------------------------------------------+------+ + |session_window |sum(v)| + +------------------------------------------+------+ + |{2016-03-11 09:00:07, 2016-03-11 09:00:12}|1 | + +------------------------------------------+------+ + + >>> df2.printSchema() + root + |-- session_window: struct (nullable = false) + | |-- start: timestamp (nullable = true) + | |-- end: timestamp (nullable = true) + |-- sum(v): long (nullable = true) """ from pyspark.sql.classic.column import _to_java_column @@ -12240,37 +12478,57 @@ def to_unix_timestamp( Parameters ---------- - timestamp : :class:`~pyspark.sql.Column` or str + timestamp : :class:`~pyspark.sql.Column` or column name Input column or strings. - format : :class:`~pyspark.sql.Column` or str, optional + format : :class:`~pyspark.sql.Column` or column name, optional format to use to convert UNIX timestamp values. + See Also + -------- + :meth:`pyspark.sql.functions.to_date` + :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_timestamp_ntz` + :meth:`pyspark.sql.functions.to_utc_timestamp` + Examples -------- >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles") - Example 1: Using default format 'yyyy-MM-dd HH:mm:ss' parses the timestamp string. + Example 1: Using default format to parse the timestamp string. >>> import pyspark.sql.functions as sf - >>> time_df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['dt']) - >>> time_df.select(sf.to_unix_timestamp('dt').alias('unix_time')).show() - +----------+ - | unix_time| - +----------+ - |1428520332| - +----------+ + >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts']) + >>> df.select('*', sf.to_unix_timestamp('ts')).show() + +-------------------+------------------------------------------+ + | ts|to_unix_timestamp(ts, yyyy-MM-dd HH:mm:ss)| + +-------------------+------------------------------------------+ + |2015-04-08 12:12:12| 1428520332| + +-------------------+------------------------------------------+ - Example 2: Using user-specified format 'yyyy-MM-dd' parses the timestamp string. + Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date string. >>> import pyspark.sql.functions as sf - >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt']) - >>> time_df.select( - ... sf.to_unix_timestamp('dt', sf.lit('yyyy-MM-dd')).alias('unix_time')).show() - +----------+ - | unix_time| - +----------+ - |1428476400| - +----------+ + >>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) + >>> df.select('*', sf.to_unix_timestamp(df.dt, sf.lit('yyyy-MM-dd'))).show() + +----------+---------------------------------+ + | dt|to_unix_timestamp(dt, yyyy-MM-dd)| + +----------+---------------------------------+ + |2015-04-08| 1428476400| + +----------+---------------------------------+ + + Example 3: Using a format column to represent different formats. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame( + ... [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt']) + >>> df.select('*', sf.to_unix_timestamp('dt', 'fmt')).show() + +----------+----------+--------------------------+ + | dt| fmt|to_unix_timestamp(dt, fmt)| + +----------+----------+--------------------------+ + |2015-04-08|yyyy-MM-dd| 1428476400| + |2025+01+09|yyyy+MM+dd| 1736409600| + +----------+----------+--------------------------+ >>> spark.conf.unset("spark.sql.session.timeZone") """ @@ -12286,29 +12544,63 @@ def to_timestamp_ltz( format: Optional["ColumnOrName"] = None, ) -> Column: """ - Parses the `timestamp` with the `format` to a timestamp without time zone. + Parses the `timestamp` with the `format` to a timestamp with time zone. Returns null with invalid input. .. versionadded:: 3.5.0 Parameters ---------- - timestamp : :class:`~pyspark.sql.Column` or str + timestamp : :class:`~pyspark.sql.Column` or column name Input column or strings. - format : :class:`~pyspark.sql.Column` or str, optional + format : :class:`~pyspark.sql.Column` or column name, optional format to use to convert type `TimestampType` timestamp values. + See Also + -------- + :meth:`pyspark.sql.functions.to_date` + :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ntz` + :meth:`pyspark.sql.functions.to_utc_timestamp` + :meth:`pyspark.sql.functions.to_unix_timestamp` + :meth:`pyspark.sql.functions.date_format` + Examples -------- - >>> df = spark.createDataFrame([("2016-12-31",)], ["e"]) - >>> df.select(to_timestamp_ltz(df.e, lit("yyyy-MM-dd")).alias('r')).collect() - ... # doctest: +SKIP - [Row(r=datetime.datetime(2016, 12, 31, 0, 0))] + Example 1: Using default format to parse the timestamp string. - >>> df = spark.createDataFrame([("2016-12-31",)], ["e"]) - >>> df.select(to_timestamp_ltz(df.e).alias('r')).collect() - ... # doctest: +SKIP - [Row(r=datetime.datetime(2016, 12, 31, 0, 0))] + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts']) + >>> df.select('*', sf.to_timestamp_ltz('ts')).show() + +-------------------+--------------------+ + | ts|to_timestamp_ltz(ts)| + +-------------------+--------------------+ + |2015-04-08 12:12:12| 2015-04-08 12:12:12| + +-------------------+--------------------+ + + Example 2: Using user-specified format to parse the date string. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([('2016-12-31',)], ['dt']) + >>> df.select('*', sf.to_timestamp_ltz(df.dt, sf.lit('yyyy-MM-dd'))).show() + +----------+--------------------------------+ + | dt|to_timestamp_ltz(dt, yyyy-MM-dd)| + +----------+--------------------------------+ + |2016-12-31| 2016-12-31 00:00:00| + +----------+--------------------------------+ + + Example 3: Using a format column to represent different formats. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame( + ... [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt']) + >>> df.select('*', sf.to_timestamp_ltz('dt', 'fmt')).show() + +----------+----------+-------------------------+ + | dt| fmt|to_timestamp_ltz(dt, fmt)| + +----------+----------+-------------------------+ + |2015-04-08|yyyy-MM-dd| 2015-04-08 00:00:00| + |2025+01+09|yyyy+MM+dd| 2025-01-09 00:00:00| + +----------+----------+-------------------------+ """ if format is not None: return _invoke_function_over_columns("to_timestamp_ltz", timestamp, format) @@ -12329,22 +12621,56 @@ def to_timestamp_ntz( Parameters ---------- - timestamp : :class:`~pyspark.sql.Column` or str + timestamp : :class:`~pyspark.sql.Column` or column name Input column or strings. - format : :class:`~pyspark.sql.Column` or str, optional + format : :class:`~pyspark.sql.Column` or column name, optional format to use to convert type `TimestampNTZType` timestamp values. + See Also + -------- + :meth:`pyspark.sql.functions.to_date` + :meth:`pyspark.sql.functions.to_timestamp` + :meth:`pyspark.sql.functions.to_timestamp_ltz` + :meth:`pyspark.sql.functions.to_utc_timestamp` + :meth:`pyspark.sql.functions.to_unix_timestamp` + :meth:`pyspark.sql.functions.date_format` + Examples -------- - >>> df = spark.createDataFrame([("2016-04-08",)], ["e"]) - >>> df.select(to_timestamp_ntz(df.e, lit("yyyy-MM-dd")).alias('r')).collect() - ... # doctest: +SKIP - [Row(r=datetime.datetime(2016, 4, 8, 0, 0))] + Example 1: Using default format to parse the timestamp string. - >>> df = spark.createDataFrame([("2016-04-08",)], ["e"]) - >>> df.select(to_timestamp_ntz(df.e).alias('r')).collect() - ... # doctest: +SKIP - [Row(r=datetime.datetime(2016, 4, 8, 0, 0))] + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts']) + >>> df.select('*', sf.to_timestamp_ntz('ts')).show() + +-------------------+--------------------+ + | ts|to_timestamp_ntz(ts)| + +-------------------+--------------------+ + |2015-04-08 12:12:12| 2015-04-08 12:12:12| + +-------------------+--------------------+ + + Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date string. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame([('2016-12-31',)], ['dt']) + >>> df.select('*', sf.to_timestamp_ntz(df.dt, sf.lit('yyyy-MM-dd'))).show() + +----------+--------------------------------+ + | dt|to_timestamp_ntz(dt, yyyy-MM-dd)| + +----------+--------------------------------+ + |2016-12-31| 2016-12-31 00:00:00| + +----------+--------------------------------+ + + Example 3: Using a format column to represent different formats. + + >>> import pyspark.sql.functions as sf + >>> df = spark.createDataFrame( + ... [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt']) + >>> df.select('*', sf.to_timestamp_ntz('dt', 'fmt')).show() + +----------+----------+-------------------------+ + | dt| fmt|to_timestamp_ntz(dt, fmt)| + +----------+----------+-------------------------+ + |2015-04-08|yyyy-MM-dd| 2015-04-08 00:00:00| + |2025+01+09|yyyy+MM+dd| 2025-01-09 00:00:00| + +----------+----------+-------------------------+ """ if format is not None: return _invoke_function_over_columns("to_timestamp_ntz", timestamp, format)