Skip to content

Commit

Permalink
Issue duckdb#14834: INTERVAL Collations
Browse files Browse the repository at this point in the history
Use a normalised INTERVAL as a collation binding for sorting INTERVALs.
This takes the regular normalisation (which uses wide integers)
and borrows right until it fits in an equivalent regular INTERVAL.

fixes: duckdb#14834
fixes: duckdblabs/duckdb-internal#3500
  • Loading branch information
hawkfish committed Nov 27, 2024
1 parent 5361637 commit bfa357d
Show file tree
Hide file tree
Showing 7 changed files with 234 additions and 1 deletion.
1 change: 1 addition & 0 deletions extension/core_functions/function_list.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ static const StaticFunctionDefinition core_functions[] = {
DUCKDB_SCALAR_FUNCTION_SET(MonthNameFun),
DUCKDB_SCALAR_FUNCTION_SET(NanosecondsFun),
DUCKDB_SCALAR_FUNCTION_SET(NextAfterFun),
DUCKDB_SCALAR_FUNCTION(NormalizedIntervalFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(NowFun),
DUCKDB_SCALAR_FUNCTION_ALIAS(OrdFun),
DUCKDB_SCALAR_FUNCTION_SET(ParseDirnameFun),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,15 @@ struct NanosecondsFun {
static ScalarFunctionSet GetFunctions();
};

struct NormalizedIntervalFun {
static constexpr const char *Name = "normalized_interval";
static constexpr const char *Parameters = "interval";
static constexpr const char *Description = "Normalizes an INTERVAL to an equivalent interval";
static constexpr const char *Example = "normalized_interval(INTERVAL '30 days')";

static ScalarFunction GetFunction();
};

struct QuarterFun {
static constexpr const char *Name = "quarter";
static constexpr const char *Parameters = "ts";
Expand Down
18 changes: 17 additions & 1 deletion extension/core_functions/scalar/date/epoch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,23 @@ ScalarFunction ToTimestampFun::GetFunction() {
return ScalarFunction({LogicalType::DOUBLE}, LogicalType::TIMESTAMP_TZ, EpochSecFunction);
}

struct NormalizedIntervalOperator {
template <typename INPUT_TYPE, typename RESULT_TYPE>
static RESULT_TYPE Operation(INPUT_TYPE input) {
return input.Normalize();
}
};

static void NormalizedIntervalFunction(DataChunk &input, ExpressionState &state, Vector &result) {
D_ASSERT(input.ColumnCount() == 1);

UnaryExecutor::Execute<interval_t, interval_t, NormalizedIntervalOperator>(input.data[0], result, input.size());
}

ScalarFunction NormalizedIntervalFun::GetFunction() {
return ScalarFunction({LogicalType::INTERVAL}, LogicalType::INTERVAL, NormalizedIntervalFunction);
}

struct TimeTZSortKeyOperator {
template <typename INPUT_TYPE, typename RESULT_TYPE>
static RESULT_TYPE Operation(INPUT_TYPE input) {
Expand All @@ -44,5 +61,4 @@ static void TimeTZSortKeyFunction(DataChunk &input, ExpressionState &state, Vect
ScalarFunction TimeTZSortKeyFun::GetFunction() {
return ScalarFunction({LogicalType::TIME_TZ}, LogicalType::UBIGINT, TimeTZSortKeyFunction);
}

} // namespace duckdb
8 changes: 8 additions & 0 deletions extension/core_functions/scalar/date/functions.json
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,14 @@
"example": "nanosecond(timestamp_ns '2021-08-03 11:59:44.123456789') => 44123456789",
"type": "scalar_function_set"
},
{
"struct": "NormalizedIntervalFun",
"name": "normalized_interval",
"parameters": "interval",
"description": "Normalizes an INTERVAL to an equivalent interval",
"example": "normalized_interval(INTERVAL '30 days')",
"type": "scalar_function"
},
{
"name": "quarter",
"parameters": "ts",
Expand Down
32 changes: 32 additions & 0 deletions src/include/duckdb/common/types/interval.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ struct interval_t { // NOLINT
int64_t micros;

inline void Normalize(int64_t &months, int64_t &days, int64_t &micros) const;

// Normalize to interval bounds.
inline static void Borrow(const int64_t msf, int64_t &lsf, int32_t &f, const int64_t scale);
inline interval_t Normalize() const;

inline bool operator==(const interval_t &right) const {
// Quick equality check
const auto &left = *this;
Expand Down Expand Up @@ -165,6 +170,7 @@ class Interval {
return left > right;
}
};

void interval_t::Normalize(int64_t &months, int64_t &days, int64_t &micros) const {
auto &input = *this;

Expand All @@ -182,4 +188,30 @@ void interval_t::Normalize(int64_t &months, int64_t &days, int64_t &micros) cons
months += carry_months;
}

void interval_t::Borrow(const int64_t msf, int64_t &lsf, int32_t &f, const int64_t scale) {
if (msf > NumericLimits<int32_t>::Maximum()) {
f = NumericLimits<int32_t>::Maximum();
lsf += (msf - f) * scale;
} else if (msf < NumericLimits<int32_t>::Minimum()) {
f = NumericLimits<int32_t>::Minimum();
lsf += (msf - f) * scale;
} else {
f = UnsafeNumericCast<int32_t>(msf);
}
}

interval_t interval_t::Normalize() const {
interval_t result;

int64_t mm;
int64_t dd;
Normalize(mm, dd, result.micros);

// Borrow right on overflow
Borrow(mm, dd, result.months, Interval::DAYS_PER_MONTH);
Borrow(dd, result.micros, result.days, Interval::MICROS_PER_DAY);

return result;
}

} // namespace duckdb
22 changes: 22 additions & 0 deletions src/planner/collation_binding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,32 @@ bool PushTimeTZCollation(ClientContext &context, unique_ptr<Expression> &source,
return true;
}

bool PushIntervalCollation(ClientContext &context, unique_ptr<Expression> &source, const LogicalType &sql_type,
CollationType) {
if (sql_type.id() != LogicalTypeId::INTERVAL) {
return false;
}

auto &catalog = Catalog::GetSystemCatalog(context);
auto &function_entry = catalog.GetEntry<ScalarFunctionCatalogEntry>(context, DEFAULT_SCHEMA, "normalized_interval");
if (function_entry.functions.Size() != 1) {
throw InternalException("normalized_interval should only have a single overload");
}
auto &scalar_function = function_entry.functions.GetFunctionReferenceByOffset(0);
vector<unique_ptr<Expression>> children;
children.push_back(std::move(source));

FunctionBinder function_binder(context);
auto function = function_binder.BindScalarFunction(scalar_function, std::move(children));
source = std::move(function);
return true;
}

// timetz_byte_comparable
CollationBinding::CollationBinding() {
RegisterCollation(CollationCallback(PushVarcharCollation));
RegisterCollation(CollationCallback(PushTimeTZCollation));
RegisterCollation(CollationCallback(PushIntervalCollation));
}

void CollationBinding::RegisterCollation(CollationCallback callback) {
Expand Down
145 changes: 145 additions & 0 deletions test/sql/types/interval/test_interval_comparison.test
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,148 @@ query I
select interval '28 days 432000 seconds' = interval '1 month 3 days';
----
True

# Sorting with normalisation
statement ok
CREATE TABLE issue14384(i INTERVAL);

statement ok
INSERT INTO issue14384(i) VALUES
('2 years 3 months'),
('-1734799452 DAYS'),
('2 DAYS'),
('13 days'),
('1 month'),
('3 days'),
;

query I
SELECT i FROM issue14384 ORDER BY ALL;
----
-1734799452 days
2 days
3 days
13 days
1 month
2 years 3 months

# PiecewiseMergeJoin
query II
SELECT *
FROM issue14384
INNER JOIN (
SELECT INTERVAL 1000 DAY AS col0
FROM issue14384) AS sub0
ON (issue14384.i < sub0.col0)
ORDER BY ALL;
----
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days

# NestedLoopJoin
query II
SELECT *
FROM issue14384
INNER JOIN (
SELECT INTERVAL 1000 DAY AS col0 FROM issue14384) AS sub0
ON (issue14384.i < sub0.col0)
WHERE (NOT (issue14384.i != issue14384.i))
ORDER BY ALL;
----
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
-1734799452 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
2 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
3 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
13 days 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
1 month 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days
2 years 3 months 1000 days


statement ok
INSERT INTO issue14384(i) VALUES
('1000 DAY'),
('28 days 432000 seconds'),
('1 month 3 days'),
('2147483647 months 2147483647 days 9223372036854775807 microseconds'),
('-2147483648 months -2147483648 days -9223372036854775807 microseconds'),
(NULL)
;

query I
SELECT i FROM issue14384 ORDER BY ALL;
----
-178956970 years -8 months -2147483648 days -2562047788:00:54.775807
-1734799452 days
2 days
3 days
13 days
1 month
28 days 120:00:00
1 month 3 days
2 years 3 months
1000 days
178956970 years 7 months 2147483647 days 2562047788:00:54.775807
NULL

0 comments on commit bfa357d

Please sign in to comment.