Skip to content

Commit

Permalink
[SPARK-31562][SQL] Update ExpressionDescription for substring, curren…
Browse files Browse the repository at this point in the history
…t_date, and current_timestamp

### What changes were proposed in this pull request?

This PR intends to add entries for substring, current_date, and current_timestamp in the SQL built-in function documents. Specifically, the entries are as follows;

 - SELECT current_date;
 - SELECT current_timestamp;
 - SELECT substring('abcd' FROM 1);
 - SELECT substring('abcd' FROM 1 FOR 2);

### Why are the changes needed?

To make the SQL (built-in functions) references complete.

### Does this PR introduce any user-facing change?

<img width="1040" alt="Screen Shot 2020-04-25 at 16 51 07" src="https://user-images.githubusercontent.com/692303/80274851-6ca5ee00-8718-11ea-9a35-9ae82008cb4b.png">

<img width="974" alt="Screen Shot 2020-04-25 at 17 24 24" src="https://user-images.githubusercontent.com/692303/80275032-a88d8300-8719-11ea-92ec-95b80169ae28.png">

<img width="862" alt="Screen Shot 2020-04-25 at 17 27 48" src="https://user-images.githubusercontent.com/692303/80275114-36696e00-871a-11ea-8e39-02e93eabb92f.png">

### How was this patch tested?

Added test examples.

Closes apache#28342 from maropu/SPARK-31562.

Authored-by: Takeshi Yamamuro <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
  • Loading branch information
maropu authored and dongjoon-hyun committed Apr 26, 2020
1 parent fe07b21 commit e01125d
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ object FunctionRegistry {
expression[Month]("month"),
expression[MonthsBetween]("months_between"),
expression[NextDay]("next_day"),
expression[CurrentTimestamp]("now", true),
expression[Now]("now"),
expression[Quarter]("quarter"),
expression[Second]("second"),
expression[ParseToTimestamp]("to_timestamp"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.internal.Logging
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID}
import org.apache.spark.sql.catalyst.expressions.{Attribute, CurrentDate, CurrentTimestamp, MonotonicallyIncreasingID, Now}
import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
import org.apache.spark.sql.catalyst.plans._
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.streaming.OutputMode

/**
Expand Down Expand Up @@ -412,7 +410,7 @@ object UnsupportedOperationChecker extends Logging {

subPlan.expressions.foreach { e =>
if (e.collectLeaves().exists {
case (_: CurrentTimestamp | _: CurrentDate) => true
case (_: CurrentTimestamp | _: Now | _: CurrentDate) => true
case _ => false
}) {
throwError(s"Continuous processing does not support current time operations.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@ import org.apache.commons.text.StringEscapeUtils
import org.apache.spark.SparkUpgradeException
import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.codegen._
import org.apache.spark.sql.catalyst.expressions.codegen.Block._
import org.apache.spark.sql.catalyst.util.{DateTimeUtils, LegacyDateFormats, TimestampFormatter}
Expand Down Expand Up @@ -62,7 +61,21 @@ trait TimeZoneAwareExpression extends Expression {
* There is no code generation since this expression should get constant folded by the optimizer.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current date at the start of query evaluation.",
usage = """
_FUNC_() - Returns the current date at the start of query evaluation.
_FUNC_ - Returns the current date at the start of query evaluation.
""",
examples = """
Examples:
> SELECT _FUNC_();
2020-04-25
> SELECT _FUNC_;
2020-04-25
""",
note = """
The syntax without braces has been supported since 2.0.1.
""",
group = "datetime_funcs",
since = "1.5.0")
case class CurrentDate(timeZoneId: Option[String] = None)
Expand All @@ -83,26 +96,52 @@ case class CurrentDate(timeZoneId: Option[String] = None)
override def prettyName: String = "current_date"
}

abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false
override def dataType: DataType = TimestampType
override def eval(input: InternalRow): Any = currentTimestamp()
}

/**
* Returns the current timestamp at the start of query evaluation.
* All calls of current_timestamp within the same query return the same value.
*
* There is no code generation since this expression should get constant folded by the optimizer.
*/
@ExpressionDescription(
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.",
usage = """
_FUNC_() - Returns the current timestamp at the start of query evaluation.
_FUNC_ - Returns the current timestamp at the start of query evaluation.
""",
examples = """
Examples:
> SELECT _FUNC_();
2020-04-25 15:49:11.914
> SELECT _FUNC_;
2020-04-25 15:49:11.914
""",
note = """
The syntax without braces has been supported since 2.0.1.
""",
group = "datetime_funcs",
since = "1.5.0")
case class CurrentTimestamp() extends LeafExpression with CodegenFallback {
override def foldable: Boolean = true
override def nullable: Boolean = false

override def dataType: DataType = TimestampType

override def eval(input: InternalRow): Any = currentTimestamp()
case class CurrentTimestamp() extends CurrentTimestampLike {
override def prettyName: String = "current_timestamp"
}

override def prettyName: String =
getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("current_timestamp")
@ExpressionDescription(
usage = "_FUNC_() - Returns the current timestamp at the start of query evaluation.",
examples = """
Examples:
> SELECT _FUNC_();
2020-04-25 15:49:11.914
""",
group = "datetime_funcs",
since = "1.6.0")
case class Now() extends CurrentTimestampLike {
override def prettyName: String = "now"
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1619,7 +1619,11 @@ case class StringSpace(child: Expression)
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.",
usage = """
_FUNC_(str, pos[, len]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.
_FUNC_(str FROM pos[ FOR len]]) - Returns the substring of `str` that starts at `pos` and is of length `len`, or the slice of byte array that starts at `pos` and is of length `len`.
""",
examples = """
Examples:
> SELECT _FUNC_('Spark SQL', 5);
Expand All @@ -1628,6 +1632,12 @@ case class StringSpace(child: Expression)
SQL
> SELECT _FUNC_('Spark SQL', 5, 1);
k
> SELECT _FUNC_('Spark SQL' FROM 5);
k SQL
> SELECT _FUNC_('Spark SQL' FROM -3);
SQL
> SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
k
""",
since = "1.5.0")
// scalastyle:on line.size.limit
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ object ComputeCurrentTime extends Rule[LogicalPlan] {
LocalDate.now(DateTimeUtils.getZoneId(timeZoneId)),
DateType)
})
case CurrentTimestamp() => currentTime
case CurrentTimestamp() | Now() => currentTime
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
val ignoreSet = Set(
// One of examples shows getting the current timestamp
"org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
"org.apache.spark.sql.catalyst.expressions.CurrentDate",
"org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
"org.apache.spark.sql.catalyst.expressions.Now",
// Random output without a seed
"org.apache.spark.sql.catalyst.expressions.Rand",
"org.apache.spark.sql.catalyst.expressions.Randn",
Expand Down
8 changes: 5 additions & 3 deletions sql/gen-sql-functions-docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,11 @@ def _make_pretty_usage(infos):
result.append(" <tbody>")

for info in infos:
# Extracts (signature, description) pairs from `info.usage`, e.g.,
# the signature is `func(expr)` and the description is `...` in an usage `func(expr) - ...`.
usages = iter(re.split(r"(%s\(.*\)) - " % info.name, info.usage.strip())[1:])
# Extracts (signature, description) pairs from `info.usage`.
# Expected formats are as follows;
# - `_FUNC_(...) - description`, or
# - `_FUNC_ - description`
usages = iter(re.split(r"(%s.*) - " % info.name, info.usage.strip())[1:])
for (sig, description) in zip(usages, usages):
result.append(" <tr>")
result.append(" <td>%s</td>" % sig)
Expand Down

0 comments on commit e01125d

Please sign in to comment.