Skip to content

Commit

Permalink
Support Const TVF returning composite type in ORCA (#13422)
Browse files Browse the repository at this point in the history
Background: Const TVF returning multi-column composite type returns incorrect result in 6X. TVF returning composite type falls back on planner in 7X.
Implementation:
1. Use invalid funcid to indicate TVF evaluates to const. Remove all valid funcid assertions.
2. Refactored code. Query -> DXL translation cases divided into three categories (i) non TVF && non const composite (eg. Var, Const returning simple type), (ii) const TVF returning composite type, (3) TVF.
3. Add relation storage type and token for composite type, similar to "virtual" storage type in 6X.
4. Remove tests for const TVF fall back. Add tests for const TVF

Co-authored-by: Jingyu Wang <[email protected]>
  • Loading branch information
2 people authored and my-ship-it committed Nov 1, 2024
1 parent 3e68f23 commit 6f10db2
Show file tree
Hide file tree
Showing 19 changed files with 426 additions and 108 deletions.
111 changes: 73 additions & 38 deletions src/backend/gpopt/translate/CTranslatorDXLToPlStmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ extern "C" {
#include "gpopt/translate/CPartPruneStepsBuilder.h"
#include "gpopt/translate/CTranslatorDXLToPlStmt.h"
#include "gpopt/translate/CTranslatorUtils.h"
#include "naucrates/dxl/operators/CDXLDatumGeneric.h"
#include "naucrates/dxl/operators/CDXLDirectDispatchInfo.h"
#include "naucrates/dxl/operators/CDXLNode.h"
#include "naucrates/dxl/operators/CDXLPhysicalAgg.h"
Expand Down Expand Up @@ -1579,19 +1580,9 @@ CTranslatorDXLToPlStmt::TranslateDXLTvfToRangeTblEntry(
RangeTblEntry *rte = MakeNode(RangeTblEntry);
rte->rtekind = RTE_FUNCTION;

FuncExpr *func_expr = MakeNode(FuncExpr);

func_expr->funcid = CMDIdGPDB::CastMdid(dxlop->FuncMdId())->Oid();
func_expr->funcretset = gpdb::GetFuncRetset(func_expr->funcid);
// this is a function call, as opposed to a cast
func_expr->funcformat = COERCE_EXPLICIT_CALL;
func_expr->funcresulttype =
CMDIdGPDB::CastMdid(dxlop->ReturnTypeMdId())->Oid();

// get function alias
Alias *alias = MakeNode(Alias);
alias->colnames = NIL;

// get function alias
alias->aliasname = CTranslatorUtils::CreateMultiByteCharStringFromWCString(
dxlop->Pstr()->GetBuffer());

Expand All @@ -1618,47 +1609,91 @@ CTranslatorDXLToPlStmt::TranslateDXLTvfToRangeTblEntry(
ul + 1 /*attno*/);
}

// function arguments
const ULONG num_of_child = tvf_dxlnode->Arity();
for (ULONG ul = 1; ul < num_of_child; ++ul)
RangeTblFunction *rtfunc = MakeNode(RangeTblFunction);
Bitmapset *funcparams = nullptr;

// invalid funcid indicates TVF evaluates to const
if (!dxlop->FuncMdId()->IsValid())
{
CDXLNode *func_arg_dxlnode = (*tvf_dxlnode)[ul];
Const *const_expr = MakeNode(Const);

const_expr->consttype =
CMDIdGPDB::CastMdid(dxlop->ReturnTypeMdId())->Oid();
const_expr->consttypmod = -1;

CDXLNode *constVa = (*tvf_dxlnode)[1];
CDXLScalarConstValue *constValue =
CDXLScalarConstValue::Cast(constVa->GetOperator());
const CDXLDatum *datum_dxl = constValue->GetDatumVal();
CDXLDatumGeneric *datum_generic_dxl =
CDXLDatumGeneric::Cast(const_cast<gpdxl::CDXLDatum *>(datum_dxl));
const IMDType *type =
m_md_accessor->RetrieveType(datum_generic_dxl->MDId());
const_expr->constlen = type->Length();
Datum val = gpdb::DatumFromPointer(datum_generic_dxl->GetByteArray());
ULONG length =
(ULONG) gpdb::DatumSize(val, false, const_expr->constlen);
CHAR *str = (CHAR *) gpdb::GPDBAlloc(length + 1);
memcpy(str, datum_generic_dxl->GetByteArray(), length);
str[length] = '\0';
const_expr->constvalue = gpdb::DatumFromPointer(str);

rtfunc->funcexpr = (Node *) const_expr;
rtfunc->funccolcount = (int) num_of_cols;
}
else
{
FuncExpr *func_expr = MakeNode(FuncExpr);

func_expr->funcid = CMDIdGPDB::CastMdid(dxlop->FuncMdId())->Oid();
func_expr->funcretset = gpdb::GetFuncRetset(func_expr->funcid);
// this is a function call, as opposed to a cast
func_expr->funcformat = COERCE_EXPLICIT_CALL;
func_expr->funcresulttype =
CMDIdGPDB::CastMdid(dxlop->ReturnTypeMdId())->Oid();

// function arguments
const ULONG num_of_child = tvf_dxlnode->Arity();
for (ULONG ul = 1; ul < num_of_child; ++ul)
{
CDXLNode *func_arg_dxlnode = (*tvf_dxlnode)[ul];

CMappingColIdVarPlStmt colid_var_mapping(m_mp, base_table_context,
nullptr, output_context,
m_dxl_to_plstmt_context);
CMappingColIdVarPlStmt colid_var_mapping(m_mp, base_table_context,
nullptr, output_context,
m_dxl_to_plstmt_context);

Expr *pexprFuncArg = m_translator_dxl_to_scalar->TranslateDXLToScalar(
func_arg_dxlnode, &colid_var_mapping);
func_expr->args = gpdb::LAppend(func_expr->args, pexprFuncArg);
}
Expr *pexprFuncArg =
m_translator_dxl_to_scalar->TranslateDXLToScalar(
func_arg_dxlnode, &colid_var_mapping);
func_expr->args = gpdb::LAppend(func_expr->args, pexprFuncArg);
}

// GPDB_91_MERGE_FIXME: collation
func_expr->inputcollid = gpdb::ExprCollation((Node *) func_expr->args);
func_expr->funccollid = gpdb::TypeCollation(func_expr->funcresulttype);
// GPDB_91_MERGE_FIXME: collation
func_expr->inputcollid = gpdb::ExprCollation((Node *) func_expr->args);
func_expr->funccollid = gpdb::TypeCollation(func_expr->funcresulttype);

// Populate RangeTblFunction::funcparams, by walking down the entire
// func_expr to capture ids of all the PARAMs
ListCell *lc = nullptr;
List *param_exprs = gpdb::ExtractNodesExpression(
(Node *) func_expr, T_Param, false /*descend_into_subqueries */);
Bitmapset *funcparams = nullptr;
ForEach(lc, param_exprs)
{
Param *param = (Param *) lfirst(lc);
funcparams = gpdb::BmsAddMember(funcparams, param->paramid);
// Populate RangeTblFunction::funcparams, by walking down the entire
// func_expr to capture ids of all the PARAMs
ListCell *lc = nullptr;
List *param_exprs = gpdb::ExtractNodesExpression(
(Node *) func_expr, T_Param, false /*descend_into_subqueries */);
ForEach(lc, param_exprs)
{
Param *param = (Param *) lfirst(lc);
funcparams = gpdb::BmsAddMember(funcparams, param->paramid);
}

rtfunc->funcexpr = (Node *) func_expr;
}

RangeTblFunction *rtfunc = MakeNode(RangeTblFunction);
rtfunc->funcexpr = (Node *) func_expr;
rtfunc->funcparams = funcparams;
// GPDB_91_MERGE_FIXME: collation
// set rtfunc->funccoltypemods & rtfunc->funccolcollations?
rte->functions = ListMake1(rtfunc);

rte->inFromCl = true;
rte->eref = alias;

rte->eref = alias;
return rte;
}

Expand Down
32 changes: 20 additions & 12 deletions src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3669,26 +3669,21 @@ CTranslatorQueryToDXL::TranslateTVFToDXL(const RangeTblEntry *rte,
GPOS_WSZ_LIT("Multi-argument UNNEST() or TABLE()"));
}
RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(rte->functions);
FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr;
GPOS_ASSERT(funcexpr);

BOOL is_composite_const =
CTranslatorUtils::IsCompositeConst(m_mp, m_md_accessor, rtfunc);

// if this is a folded function expression, generate a project over a CTG
if (!IsA(funcexpr, FuncExpr))
if (!IsA(rtfunc->funcexpr, FuncExpr) && !is_composite_const)
{
if (gpdb::IsCompositeType(funcexpr->funcid))
{
GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
GPOS_WSZ_LIT("Whole-row variable"));
}

CDXLNode *const_tbl_get_dxlnode = DXLDummyConstTableGet();

CDXLNode *project_list_dxlnode = GPOS_NEW(m_mp)
CDXLNode(m_mp, GPOS_NEW(m_mp) CDXLScalarProjList(m_mp));

CDXLNode *project_elem_dxlnode =
TranslateExprToDXLProject((Expr *) funcexpr, rte->eref->aliasname,
true /* insist_new_colids */);
CDXLNode *project_elem_dxlnode = TranslateExprToDXLProject(
(Expr *) rtfunc->funcexpr, rte->eref->aliasname,
true /* insist_new_colids */);
project_list_dxlnode->AddChild(project_elem_dxlnode);

CDXLNode *project_dxlnode = GPOS_NEW(m_mp)
Expand All @@ -3712,6 +3707,19 @@ CTranslatorQueryToDXL::TranslateTVFToDXL(const RangeTblEntry *rte,

BOOL is_subquery_in_args = false;

// funcexpr evaluates to const and returns composite type
if (IsA(rtfunc->funcexpr, Const))
{
CDXLNode *constValue = m_scalar_translator->TranslateScalarToDXL(
(Expr *) (rtfunc->funcexpr), m_var_to_colid_map);
tvf_dxlnode->AddChild(constValue);
return tvf_dxlnode;
}

GPOS_ASSERT(IsA(rtfunc->funcexpr, FuncExpr));

FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr;

// check if arguments contain SIRV functions
if (NIL != funcexpr->args && HasSirvFunctions((Node *) funcexpr->args))
{
Expand Down
6 changes: 5 additions & 1 deletion src/backend/gpopt/translate/CTranslatorRelcacheToDXL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2454,7 +2454,11 @@ CTranslatorRelcacheToDXL::RetrieveRelStorageType(Relation rel)
break;
case 0:

if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE)
{
rel_storage_type = IMDRelation::ErelstorageCompositeType;
}
else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
rel_storage_type = RetrieveStorageTypeForPartitionedTable(rel);
else if (gpdb::RelIsExternalTable(rel->rd_id))
rel_storage_type = IMDRelation::ErelstorageExternal;
Expand Down
50 changes: 46 additions & 4 deletions src/backend/gpopt/translate/CTranslatorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,18 +271,40 @@ CTranslatorUtils::ConvertToCDXLLogicalTVF(CMemoryPool *mp,
*/

RangeTblFunction *rtfunc = (RangeTblFunction *) linitial(rte->functions);
FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr;
GPOS_ASSERT(funcexpr);
GPOS_ASSERT(IsA(funcexpr, FuncExpr));


// TVF evaluates to const, return const DXL node
if (IsA(rtfunc->funcexpr, Const))
{
Const *constExpr = (Const *) rtfunc->funcexpr;

CMDIdGPDB *mdid_return_type =
GPOS_NEW(mp) CMDIdGPDB(constExpr->consttype);

const IMDType *type = md_accessor->RetrieveType(mdid_return_type);
CDXLColDescrArray *column_descrs = GetColumnDescriptorsFromComposite(
mp, md_accessor, id_generator, type);

CMDName *func_name =
CDXLUtils::CreateMDNameFromCharArray(mp, rte->eref->aliasname);
mdid_return_type->AddRef();

// if TVF evaluates to const, pass invalid key as funcid
CDXLLogicalTVF *tvf_dxl = GPOS_NEW(mp)
CDXLLogicalTVF(mp, GPOS_NEW(mp) CMDIdGPDB(0), mdid_return_type,
func_name, column_descrs);

return tvf_dxl;
}

FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr;
// In the planner, scalar functions that are volatile (SIRV) or read or modify SQL
// data get patched into an InitPlan. This is not supported in the optimizer
if (IsSirvFunc(mp, md_accessor, funcexpr->funcid))
{
GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature,
GPOS_WSZ_LIT("SIRV functions"));
}

// get function id
CMDIdGPDB *mdid_func = GPOS_NEW(mp) CMDIdGPDB(funcexpr->funcid);
CMDIdGPDB *mdid_return_type =
Expand Down Expand Up @@ -2481,4 +2503,24 @@ CTranslatorUtils::GetAggKind(EdxlAggrefKind aggkind)
}
}

//---------------------------------------------------------------------------
// CTranslatorUtils::IsCompositeConst
// Check if const func returns composite type
//---------------------------------------------------------------------------
BOOL
CTranslatorUtils::IsCompositeConst(CMemoryPool *mp, CMDAccessor *md_accessor,
const RangeTblFunction *rtfunc)
{
if (!IsA(rtfunc->funcexpr, Const))
return false;

Const *constExpr = (Const *) rtfunc->funcexpr;

CMDIdGPDB *mdid_return_type = GPOS_NEW(mp) CMDIdGPDB(constExpr->consttype);

const IMDType *type = md_accessor->RetrieveType(mdid_return_type);

return type->IsComposite();
}

// EOF
16 changes: 12 additions & 4 deletions src/backend/gporca/libgpopt/src/operators/CLogicalTVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ CLogicalTVF::CLogicalTVF(CMemoryPool *mp, IMDId *mdid_func,
m_pdrgpcoldesc(pdrgpcoldesc),
m_pdrgpcrOutput(nullptr)
{
GPOS_ASSERT(mdid_func->IsValid());
GPOS_ASSERT(mdid_return_type->IsValid());
GPOS_ASSERT(nullptr != str);
GPOS_ASSERT(nullptr != pdrgpcoldesc);
Expand All @@ -72,10 +71,18 @@ CLogicalTVF::CLogicalTVF(CMemoryPool *mp, IMDId *mdid_func,
m_pdrgpcrOutput = PdrgpcrCreateMapping(mp, pdrgpcoldesc, UlOpId());

CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
const IMDFunction *pmdfunc = md_accessor->RetrieveFunc(m_func_mdid);
if (mdid_func->IsValid())
{
const IMDFunction *pmdfunc = md_accessor->RetrieveFunc(m_func_mdid);

m_efs = pmdfunc->GetFuncStability();
m_returns_set = pmdfunc->ReturnsSet();
m_efs = pmdfunc->GetFuncStability();
m_returns_set = pmdfunc->ReturnsSet();
}
else
{
m_efs = gpmd::IMDFunction::EfsImmutable;
m_returns_set = false;
}
}

//---------------------------------------------------------------------------
Expand Down Expand Up @@ -145,6 +152,7 @@ CLogicalTVF::HashValue() const
gpos::CombineHashes(
m_return_type_mdid->HashValue(),
gpos::HashPtr<CColumnDescriptorArray>(m_pdrgpcoldesc))));

ulHash =
gpos::CombineHashes(ulHash, CUtils::UlHashColArray(m_pdrgpcrOutput));
return ulHash;
Expand Down
6 changes: 4 additions & 2 deletions src/backend/gporca/libgpopt/src/operators/CPhysicalTVF.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,16 @@ CPhysicalTVF::CPhysicalTVF(CMemoryPool *mp, IMDId *mdid_func,
m_pdrgpcoldesc(pdrgpcoldesc),
m_pcrsOutput(pcrsOutput)
{
GPOS_ASSERT(m_func_mdid->IsValid());
GPOS_ASSERT(m_return_type_mdid->IsValid());
GPOS_ASSERT(nullptr != m_pstr);
GPOS_ASSERT(nullptr != m_pdrgpcoldesc);
GPOS_ASSERT(nullptr != m_pcrsOutput);

CMDAccessor *md_accessor = COptCtxt::PoctxtFromTLS()->Pmda();
m_pmdfunc = md_accessor->RetrieveFunc(m_func_mdid);
if (m_func_mdid->IsValid())
m_pmdfunc = md_accessor->RetrieveFunc(m_func_mdid);
else
m_pmdfunc = nullptr;
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,9 @@ CTranslatorDXLToExpr::PexprLogicalTVF(const CDXLNode *dxlnode)
ConstructDXLColId2ColRefMapping(dxl_op->GetDXLColumnDescrArray(),
popTVF->PdrgpcrOutput());

if (!popTVF->FuncMdId()->IsValid())
return pexpr;

const IMDFunction *pmdfunc = m_pmda->RetrieveFunc(mdid_func);

if (IMDFunction::EfsVolatile == pmdfunc->GetFuncStability())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ enum Edxltoken
EdxltokenRelStorageAppendOnlyRows,
EdxltokenRelStorageMixedPartitioned,
EdxltokenRelStorageExternal,
EdxltokenRelStorageCompositeType,

EdxltokenPartKeys,
EdxltokenPartTypes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class IMDRelation : public IMDCacheObject
ErelstorageAppendOnlyRows,
ErelstorageExternal,
ErelstorageMixedPartitioned,
ErelstorageCompositeType,
ErelstorageSentinel
};

Expand Down
2 changes: 2 additions & 0 deletions src/backend/gporca/libnaucrates/src/md/IMDRelation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ IMDRelation::GetStorageTypeStr(IMDRelation::Erelstoragetype rel_storage_type)
case ErelstorageMixedPartitioned:
return CDXLTokens::GetDXLTokenStr(
EdxltokenRelStorageMixedPartitioned);
case ErelstorageCompositeType:
return CDXLTokens::GetDXLTokenStr(EdxltokenRelStorageCompositeType);
default:
return nullptr;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ CDXLLogicalTVF::CDXLLogicalTVF(CMemoryPool *mp, IMDId *mdid_func,
m_mdname(mdname),
m_dxl_col_descr_array(pdrgdxlcd)
{
GPOS_ASSERT(m_func_mdid->IsValid());
GPOS_ASSERT(m_return_type_mdid->IsValid());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ CDXLPhysicalTVF::CDXLPhysicalTVF(CMemoryPool *mp, IMDId *mdid_func,
func_name(str)
{
GPOS_ASSERT(nullptr != m_func_mdid);
GPOS_ASSERT(m_func_mdid->IsValid());
GPOS_ASSERT(nullptr != m_return_type_mdid);
GPOS_ASSERT(m_return_type_mdid->IsValid());
GPOS_ASSERT(nullptr != func_name);
Expand Down
Loading

0 comments on commit 6f10db2

Please sign in to comment.