diff --git a/contrib/extfmtcsv/extfmtcsv.c b/contrib/extfmtcsv/extfmtcsv.c index bfd45419d..d4e407fdb 100644 --- a/contrib/extfmtcsv/extfmtcsv.c +++ b/contrib/extfmtcsv/extfmtcsv.c @@ -484,115 +484,8 @@ void buildFormatterOptionsInJson(PG_FUNCTION_ARGS, char **jsonStr) } /* add default settings for this formatter */ - if (json_object_object_get(optJsonObject, "delimiter") == NULL) - { - json_object_object_add(optJsonObject, "delimiter", - json_object_new_string( - (externalFmtType == TextFormatTypeTXT) ? "\t" : ",")); - } - - if (json_object_object_get(optJsonObject, "null") == NULL) - { - json_object_object_add(optJsonObject, "null", - json_object_new_string( - (externalFmtType == TextFormatTypeTXT) ? "\\N" : "")); - } - - if (json_object_object_get(optJsonObject, "fill_missing_fields") == NULL) - { - json_object_object_add(optJsonObject, "fill_missing_fields", - json_object_new_boolean(0)); - } - else - { - json_object_object_del(optJsonObject, "fill_missing_fields"); - json_object_object_add(optJsonObject, "fill_missing_fields", - json_object_new_boolean(1)); - } - - if (json_object_object_get(optJsonObject, "header") == NULL) - { - json_object_object_add(optJsonObject, "header", - json_object_new_boolean(0)); - } - else - { - json_object_object_del(optJsonObject, "header"); - json_object_object_add(optJsonObject, "header", - json_object_new_boolean(1)); - } - - if (json_object_object_get(optJsonObject, "reject_limit") == NULL) - { - json_object_object_add(optJsonObject, "reject_limit", - json_object_new_int(0)); - } - - if (json_object_object_get(optJsonObject, "err_table") == NULL) - { - json_object_object_add(optJsonObject, "err_table", - json_object_new_string("")); - } - - if (json_object_object_get(optJsonObject, "newline") == NULL) - { - json_object_object_add(optJsonObject, "newline", - json_object_new_string("lf")); - } - - if (json_object_object_get(optJsonObject, "encoding") == NULL) - { - const char *encodingStr = pg_encoding_to_char( - ((FormatterData*) fcinfo->context)->fmt_external_encoding); - char lowerCaseEncodingStr[64]; - strcpy(lowerCaseEncodingStr, encodingStr); - for (char *p = lowerCaseEncodingStr; *p != '\0'; ++p) - { - *p = tolower(*p); - } - - json_object_object_add(optJsonObject, "encoding", - json_object_new_string(lowerCaseEncodingStr)); - } - - if (externalFmtType == TextFormatTypeCSV - && json_object_object_get(optJsonObject, "quote") == NULL) - { - json_object_object_add(optJsonObject, "quote", - json_object_new_string("\"")); - } - - if (json_object_object_get(optJsonObject, "escape") == NULL) - { - if (externalFmtType == TextFormatTypeCSV) - { - /* Let escape follow quote's setting */ - struct json_object *val = json_object_object_get(optJsonObject, - "quote"); - json_object_object_add(optJsonObject, "escape", - json_object_new_string(json_object_get_string(val))); - } - else - { - json_object_object_add(optJsonObject, "escape", - json_object_new_string("\\")); - } - } - - if (json_object_object_get(optJsonObject, "force_quote") == NULL) - { - json_object_object_add(optJsonObject, "force_quote", - json_object_new_string("")); - } - - /* This is for csv formatter only */ - if (externalFmtType == TextFormatTypeCSV - && json_object_object_get(optJsonObject, "force_notnull") == NULL) - { - json_object_object_add(optJsonObject, "force_notnull", - json_object_new_string("")); - } - + int encoding = ((FormatterData *)fcinfo->context)->fmt_external_encoding; + buildDefaultFormatterOptionsInJson(encoding, externalFmtType, optJsonObject); *jsonStr = NULL; if (optJsonObject != NULL) { diff --git a/contrib/exthdfs/exthdfs.c b/contrib/exthdfs/exthdfs.c index 719c02181..1d19e89fc 100644 --- a/contrib/exthdfs/exthdfs.c +++ b/contrib/exthdfs/exthdfs.c @@ -404,6 +404,16 @@ Datum hdfsprotocol_validate(PG_FUNCTION_ARGS) (errcode(ERRCODE_SYNTAX_ERROR), errmsg("hdfsprotocol_validate : " "'force_quote' option is only available in 'csv' formatter"))); } } + + if (strcasecmp(de->defname, "header") == 0) + { + /* this is allowed only for readable table */ + if (pvalidator_data->direction != EXT_VALIDATE_READ) + { + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), errmsg("hdfsprotocol_validate : " "'header' option is only available in readable external table"))); + } + } } /* All urls should diff --git a/contrib/hornet/hornet.c b/contrib/hornet/hornet.c index f14990473..37774598f 100644 --- a/contrib/hornet/hornet.c +++ b/contrib/hornet/hornet.c @@ -25,6 +25,7 @@ #include "storage/fd.h" #include "storage/filesystem.h" #include "utils/builtins.h" +#include "utils/hawq_funcoid_mapping.h" Datum ls_hdfs_dir(PG_FUNCTION_ARGS); @@ -131,3 +132,10 @@ Datum ls_hdfs_dir(PG_FUNCTION_ARGS) { SRF_RETURN_DONE(funcctx); } } + +PG_FUNCTION_INFO_V1(is_supported_proc_in_NewQE); +Datum is_supported_proc_in_NewQE(PG_FUNCTION_ARGS) { + Oid a = PG_GETARG_OID(0); + int32_t mappingFuncId = HAWQ_FUNCOID_MAPPING(a); + PG_RETURN_BOOL(!(IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId))); +} diff --git a/contrib/hornet/load_hornet_helper_function.sql b/contrib/hornet/load_hornet_helper_function.sql index 5a157d53e..06c0dfdc8 100644 --- a/contrib/hornet/load_hornet_helper_function.sql +++ b/contrib/hornet/load_hornet_helper_function.sql @@ -189,3 +189,8 @@ BEGIN return contents_command; END; $$ LANGUAGE PLPGSQL; + + +drop function if exists is_supported_proc_in_NewQE(oid); + +create function is_supported_proc_in_NewQE(oid) returns boolean as '$libdir/hornet','is_supported_proc_in_NewQE'language c immutable; \ No newline at end of file diff --git a/contrib/orc/orc.c b/contrib/orc/orc.c index b58943d20..3dfda328b 100644 --- a/contrib/orc/orc.c +++ b/contrib/orc/orc.c @@ -327,7 +327,7 @@ Datum orc_validate_encodings(PG_FUNCTION_ARGS) if (strncasecmp(encoding_name, "utf8", strlen("utf8"))) { ereport(ERROR, - (errcode(ERRCODE_SYNTAX_ERROR), errmsg("\"%s\" is not a valid encoding for ORC external table. ", encoding_name), errOmitLocation(true))); + (errcode(ERRCODE_SYNTAX_ERROR), errmsg("\"%s\" is not a valid encoding for ORC external table. Encoding for ORC external table must be UTF8.", encoding_name), errOmitLocation(true))); } PG_RETURN_VOID() ; @@ -358,6 +358,9 @@ Datum orc_validate_datatypes(PG_FUNCTION_ARGS) { int4 tmp_typmod = typmod - VARHDRSZ; int precision = (tmp_typmod >> 16) & 0xffff; int scale = tmp_typmod & 0xffff; + + if (typmod == -1 && strcasecmp(orc_enable_no_limit_numeric, "ON") == 0) continue; // for numeric without precision and scale. + if (precision < 1 || 38 < precision) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index c0f92361f..6211fdd66 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -16,6 +16,7 @@ #include "postgres.h" #include "access/reloptions.h" +#include "access/orcam.h" #include "catalog/pg_type.h" #include "cdb/cdbappendonlyam.h" #include "cdb/cdbparquetstoragewrite.h" @@ -321,6 +322,7 @@ default_reloptions(Datum reloptions, bool validate, char relkind, "bucketnum", "dicthreshold", "bloomfilter", + "stripesize", }; char *values[ARRAY_SIZE(default_keywords)]; @@ -328,6 +330,7 @@ default_reloptions(Datum reloptions, bool validate, char relkind, int32 blocksize = DEFAULT_APPENDONLY_BLOCK_SIZE; int32 pagesize = DEFAULT_PARQUET_PAGE_SIZE; int32 rowgroupsize = DEFAULT_PARQUET_ROWGROUP_SIZE; + int32 stripesize = DEFAULT_ORC_STRIPE_SIZE; bool appendonly = false; bool checksum = false; char* compresstype = NULL; @@ -808,6 +811,30 @@ default_reloptions(Datum reloptions, bool validate, char relkind, errOmitLocation(true))); } + /* stripesize */ + if (values[13] != NULL) + { + if(!(columnstore == RELSTORAGE_ORC)){ + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid option \'stripesize\' for non-orc table"), + errOmitLocation(true))); + } + + stripesize = pg_atoi(values[13], sizeof(int32), 0); + + if ((stripesize < MIN_ORC_STRIPE_SIZE) || (stripesize > MAX_ORC_STRIPE_SIZE)) + { + if (validate) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("stripe size for orc table should between 1MB and 1GB and should be specified in MBytes. " + "Got %d MB", stripesize), errOmitLocation(true))); + + stripesize = DEFAULT_ORC_STRIPE_SIZE; + } + } + // dicthreshold if (values[11] != NULL) { if(!(columnstore == RELSTORAGE_ORC)){ @@ -861,6 +888,7 @@ default_reloptions(Datum reloptions, bool validate, char relkind, result->blocksize = blocksize; result->pagesize = pagesize; result->rowgroupsize = rowgroupsize; + result->stripesize = stripesize; result->compresslevel = compresslevel; if (compresstype != NULL) for (j = 0;j < strlen(compresstype); j++) diff --git a/src/backend/access/external/url_curl.c b/src/backend/access/external/url_curl.c index eb5796019..12065fbf9 100644 --- a/src/backend/access/external/url_curl.c +++ b/src/backend/access/external/url_curl.c @@ -643,17 +643,26 @@ static int check_response(URL_FILE *file, int *rc, char **response_string) { snprintf(connmsg, sizeof connmsg, "error code = %d (%s)", (int)oserrno, strerror((int)oserrno)); } - - ereport( - ERROR, - (errcode(ERRCODE_CONNECTION_FAILURE), - errmsg("connection with gpfdist failed for \"%s\", effective url: " - "\"%s\". %s", - file->url, effective_url, (oserrno != 0 ? connmsg : "")))); + // When still_running == 0 and os level err = "time out", we will not + // report error. + if (!(file->u.curl.still_running == 0 && oserrno == 110)) { + ereport( + ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("connection with gpfdist failed for \"%s\", effective url: " + "\"%s\". %s", + file->url, effective_url, (oserrno != 0 ? connmsg : "")))); + } } else if (response_code == FDIST_TIMEOUT) // gpfdist server return timeout code { - return FDIST_TIMEOUT; + // When still_running == 0 and gpfdist return err = "time out", we will + // not report error. + if (file->u.curl.still_running == 0) { + return 0; + } else { + return FDIST_TIMEOUT; + } } else { /* we need to sleep 1 sec to avoid this condition: 1- seg X gets an error message from gpfdist @@ -1228,7 +1237,6 @@ URL_FILE *url_curl_fopen(char *url, bool forwrite, extvar_t *ev, elog(ERROR, "internal error: curl_multi_add_handle failed (%d - %s)", e, curl_easy_strerror(e)); } - while (CURLM_CALL_MULTI_PERFORM == (e = curl_multi_perform(multi_handle, &file->u.curl.still_running))) ; @@ -1241,15 +1249,16 @@ URL_FILE *url_curl_fopen(char *url, bool forwrite, extvar_t *ev, fill_buffer(file, 1); /* check the connection for GET request */ - // if connection is established, http_response should not be null - if (file->u.curl.still_running > 0 || file->u.curl.http_response == 0) { - if (check_response(file, &response_code, &response_string)) - ereport(ERROR, - (errcode(ERRCODE_CONNECTION_FAILURE), - errmsg("could not open \"%s\" for reading", file->url), - errdetail("Unexpected response from gpfdist server: %d - %s", - response_code, response_string))); - } + // When other vseg has read all data and this vseg attend to read 1 byte to + // check connection, it may get error "timed out". + // If error is not "timed out", we will still report error. + if (check_response(file, &response_code, &response_string)) + ereport(ERROR, + (errcode(ERRCODE_CONNECTION_FAILURE), + errmsg("could not open \"%s\" for reading", file->url), + errdetail("Unexpected response from gpfdist server: %d - %s", + response_code, response_string))); + if (file->u.curl.still_running == 0) { elog(LOG, "session closed when checking the connection in url_curl_fopen, " diff --git a/src/backend/access/orc/orcam.c b/src/backend/access/orc/orcam.c index 243970430..cc4893bfb 100644 --- a/src/backend/access/orc/orcam.c +++ b/src/backend/access/orc/orcam.c @@ -176,6 +176,9 @@ OrcInsertDescData *orcBeginInsert(Relation rel, appendStringInfo(&option, "\"logicEof\": %" PRId64, segfileinfo->eof[0]); appendStringInfo(&option, ", \"uncompressedEof\": %" PRId64, segfileinfo->uncompressed_eof[0]); + appendStringInfo( + &option, ", \"stripeSize\": %" PRId64, + ((StdRdOptions *)(rel->rd_options))->stripesize * 1024 * 1024); if (aoentry->compresstype) appendStringInfo(&option, ", %s", aoentry->compresstype); appendStringInfoChar(&option, '}'); @@ -911,6 +914,49 @@ uint64 orcEndUpdate(OrcUpdateDescData *updateDesc) { return callback.processedTupleCount; } +int64_t *orcCreateIndex(Relation rel, int idxId, List *segno, int64 *eof, + List *columnsToRead, int sortIdx) { + checkOushuDbExtensiveFeatureSupport("ORC INDEX"); + OrcScanDescData *scanDesc = palloc0(sizeof(OrcScanDescData)); + OrcFormatData *orcFormatData = scanDesc->orcFormatData = + palloc0(sizeof(OrcFormatData)); + + RelationIncrementReferenceCount(rel); + + TupleDesc desc = RelationGetDescr(rel); + + scanDesc->rel = rel; + orcFormatData->fmt = ORCFormatNewORCFormatC("{}", 0); + initOrcFormatUserData(desc, orcFormatData); + + int32 splitCount = list_length(segno); + int *columnsToReadList = + palloc0(sizeof(int) * orcFormatData->numberOfColumns); + for (int i = 0; i < list_length(columnsToRead); i++) { + columnsToReadList[list_nth_int(columnsToRead, i) - 1] = 1; + } + int *sortIdxList = palloc0(sizeof(int) * orcFormatData->numberOfColumns); + for (int i = 0; i < sortIdx; i++) { + sortIdxList[i] = list_nth_int(columnsToRead, i) - 1; + } + + ORCFormatFileSplit *splits = palloc0(sizeof(ORCFormatFileSplit) * splitCount); + int32 filePathMaxLen = AOSegmentFilePathNameLen(rel) + 1; + for (int32 i = 0; i < splitCount; ++i) { + splits[i].fileName = palloc0(filePathMaxLen); + MakeAOSegmentFileName(rel, list_nth_int(segno, i), -1, dummyPlaceholder, + splits[i].fileName); + } + + if (splitCount > 0) addFilesystemCredential(splits[0].fileName); + RelationDecrementReferenceCount(rel); + return ORCFormatCreateIndex( + idxId, splits, splitCount, eof, columnsToReadList, sortIdxList, sortIdx, + orcFormatData->colNames, orcFormatData->colDatatypes, + orcFormatData->colDatatypeMods, orcFormatData->numberOfColumns, + gp_session_id, rm_seg_tmp_dirs); +} + bool isDirectDispatch(Plan *plan) { return plan->directDispatch.isDirectDispatch; } diff --git a/src/backend/access/orc/orcsegfiles.c b/src/backend/access/orc/orcsegfiles.c index f11e1c880..8fea0a853 100644 --- a/src/backend/access/orc/orcsegfiles.c +++ b/src/backend/access/orc/orcsegfiles.c @@ -55,6 +55,32 @@ void insertInitialOrcSegnoEntry(AppendOnlyEntry *aoEntry, int segNo) { heap_close(segRel, RowExclusiveLock); } +void insertInitialOrcIndexEntry(AppendOnlyEntry *aoEntry, int idxOid, int segNo) +{ + if (idxOid == 0 || segNo == 0) return; + + Relation segRel = heap_open(aoEntry->blkdirrelid, RowExclusiveLock); + TupleDesc desc = RelationGetDescr(segRel); + int natts = desc->natts; + bool *nulls = palloc(sizeof(bool) * natts); + Datum *values = palloc0(sizeof(Datum) * natts); + MemSet(nulls, 0, sizeof(char) * natts); + + values[Anum_pg_orcseg_idx_idxoid - 1] = Int32GetDatum(idxOid); + values[Anum_pg_orcseg_idx_segno - 1] = Int32GetDatum(segNo); + values[Anum_pg_orcseg_idx_eof - 1] = Float8GetDatum(0); + HeapTuple tuple = heap_form_tuple(desc, values, nulls); + if (!HeapTupleIsValid(tuple)) + elog(ERROR, "failed to build orc index file segment tuple"); + + frozen_heap_insert(segRel, tuple); + + if (Gp_role == GP_ROLE_DISPATCH) CatalogUpdateIndexes(segRel, tuple); + + heap_freetuple(tuple); + heap_close(segRel, RowExclusiveLock); +} + void insertOrcSegnoEntry(AppendOnlyEntry *aoEntry, int segNo, float8 tupleCount, float8 eof, float8 uncompressedEof) { Relation segRel = heap_open(aoEntry->segrelid, RowExclusiveLock); @@ -79,6 +105,59 @@ void insertOrcSegnoEntry(AppendOnlyEntry *aoEntry, int segNo, float8 tupleCount, heap_close(segRel, RowExclusiveLock); } +void deleteOrcIndexFileInfo(AppendOnlyEntry *aoEntry, int idxOid) +{ + if (aoEntry->blkdirrelid == 0) return; + Relation segRel = heap_open(aoEntry->blkdirrelid, RowExclusiveLock); + TupleDesc desc = RelationGetDescr(segRel); + ScanKeyData key[1]; + ScanKeyInit(&key[0], (AttrNumber)Anum_pg_orcseg_idx_idxoid, BTEqualStrategyNumber, + F_INT4EQ, Int32GetDatum(idxOid)); + SysScanDesc scan = systable_beginscan(segRel, aoEntry->blkdiridxid, TRUE, + SnapshotNow, 1, &key[0]); + HeapTuple tuple; + while ((tuple = systable_getnext(scan))) + { + simple_heap_delete(segRel, &tuple->t_self); + } + + systable_endscan(scan); + heap_close(segRel, RowExclusiveLock); +} + +void updateOrcIndexFileInfo(AppendOnlyEntry *aoEntry, int idxOid, int segNo, int64 eof) +{ + Relation segRel = heap_open(aoEntry->blkdirrelid, RowExclusiveLock); + TupleDesc desc = RelationGetDescr(segRel); + /* both idxoid and segno needed to scan tuple */ + ScanKeyData key[2]; + ScanKeyInit(&key[0], (AttrNumber)Anum_pg_orcseg_idx_idxoid, BTEqualStrategyNumber, + F_INT4EQ, Int32GetDatum(idxOid)); + ScanKeyInit(&key[1], (AttrNumber)Anum_pg_orcseg_idx_segno, BTEqualStrategyNumber, + F_INT4EQ, Int32GetDatum(segNo)); + SysScanDesc scan = systable_beginscan(segRel, aoEntry->blkdiridxid, TRUE, + SnapshotNow, 2, &key[0]); + HeapTuple tuple = systable_getnext(scan); + + Datum *record = palloc0(sizeof(Datum) * desc->natts); + bool *nulls = palloc0(sizeof(bool) * desc->natts); + bool *repl = palloc0(sizeof(bool) * desc->natts); + + record[Anum_pg_orcseg_idx_eof - 1] = Float8GetDatum((float8)eof); + repl[Anum_pg_orcseg_idx_eof - 1] = true; + + HeapTuple newTuple = heap_modify_tuple(tuple, desc, record, nulls, repl); + simple_heap_update(segRel, &tuple->t_self, newTuple); + CatalogUpdateIndexes(segRel, newTuple); + heap_freetuple(newTuple); + + systable_endscan(scan); + heap_close(segRel, RowExclusiveLock); + pfree(record); + pfree(nulls); + pfree(repl); +} + void updateOrcFileSegInfo(Relation rel, AppendOnlyEntry *aoEntry, int segNo, int64 eof, int64 uncompressedEof, int64 tupCountAdded, bool forInsert) { diff --git a/src/backend/catalog/aoseg.c b/src/backend/catalog/aoseg.c index 7e88b91ab..a1633ef0c 100644 --- a/src/backend/catalog/aoseg.c +++ b/src/backend/catalog/aoseg.c @@ -406,9 +406,10 @@ create_aoseg_index_table(Relation rel, Oid aosegOid, Oid aosegIndexOid, Oid * co * Create unique index on index oid. */ indexInfo = makeNode(IndexInfo); - indexInfo->ii_NumIndexAttrs = 1; - indexInfo->ii_NumIndexKeyAttrs = 1; + indexInfo->ii_NumIndexAttrs = 2; + indexInfo->ii_NumIndexKeyAttrs = 2; indexInfo->ii_KeyAttrNumbers[0] = 1; + indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c index e13b97b56..24774df04 100644 --- a/src/backend/catalog/heap.c +++ b/src/backend/catalog/heap.c @@ -798,6 +798,7 @@ void CheckAttributeForOrc(TupleDesc desc) { int4 tmp_typmod = typmod - VARHDRSZ; int precision = (tmp_typmod >> 16) & 0xffff; int scale = tmp_typmod & 0xffff; + if(typmod == -1 && strcasecmp(orc_enable_no_limit_numeric, "ON") == 0)continue; if (precision < 1 || 38 < precision) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index 7747e5f12..29669825c 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -46,6 +46,7 @@ #include "access/genam.h" #include "access/fileam.h" #include "access/heapam.h" +#include "access/orcsegfiles.h" #include "access/relscan.h" #include "access/sysattr.h" #include "access/transam.h" @@ -58,6 +59,7 @@ #include "catalog/index.h" #include "catalog/indexing.h" #include "catalog/namespace.h" +#include "catalog/pg_appendonly.h" #include "catalog/pg_constraint.h" #include "catalog/pg_exttable.h" #include "catalog/pg_namespace.h" @@ -1241,6 +1243,23 @@ index_drop(Oid indexId) if (hasexprs) RemoveStatistics(indexId, 0); + /* native orc need to delete pg_aoseg.pg_orcseg_idx_xxx rows */ + if (RelationIsOrc(userHeapRelation)) + { + AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(heapId, SnapshotNow); + if (0 != caql_getcount( + NULL, + cql("SELECT COUNT(*) FROM pg_class " + " WHERE oid = :1 ", + ObjectIdGetDatum(aoEntry->blkdirrelid)))) + { + Assert(aoEntry != NULL); + deleteOrcIndexFileInfo(aoEntry, indexId); + pfree(aoEntry); + } + /* todo: need to dispatch drop index to clean index data */ + } + /* * fix ATTRIBUTE relation */ diff --git a/src/backend/cdb/cdbdatalocality.c b/src/backend/cdb/cdbdatalocality.c index 0e908fcf1..7fd620963 100644 --- a/src/backend/cdb/cdbdatalocality.c +++ b/src/backend/cdb/cdbdatalocality.c @@ -679,6 +679,8 @@ bool collect_scan_rangetable(Node *node, case T_ExternalScan: case T_MagmaIndexScan: case T_MagmaIndexOnlyScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_AppendOnlyScan: case T_ParquetScan: { RangeTblEntry *rte = rt_fetch(((Scan *)node)->scanrelid, diff --git a/src/backend/cdb/cdbdispatchresult.c b/src/backend/cdb/cdbdispatchresult.c index b07313ec7..62ad3dd70 100644 --- a/src/backend/cdb/cdbdispatchresult.c +++ b/src/backend/cdb/cdbdispatchresult.c @@ -1069,6 +1069,32 @@ process_aotupcounts(PartitionNode *parts, HTAB *ht, return ht; } +void +cdbdisp_handleModifiedOrcIndexCatalogOnSegments(List **segnoToVseg, CdbDispatchResults *results, + void (*handler)(QueryContextDispatchingSendBack sendback, List **l1)) +{ + int i; + for (i = 0; i < results->resultCount; ++i) + { + CdbDispatchResult *dispatchResult = &results->resultArray[i]; + int nres = cdbdisp_numPGresult(dispatchResult); + int ires; + for (ires = 0; ires < nres; ++ires) + { + /* for each PGresult */ + PGresult *pgresult = cdbdisp_getPGresult(dispatchResult, ires); + if (handler && pgresult && pgresult->sendback) + { + int j; + for (j = 0 ; j < pgresult->numSendback ; ++j) + { + handler(&pgresult->sendback[j], segnoToVseg); + } + } + } + } +} + void cdbdisp_handleModifiedCatalogOnSegments(CdbDispatchResults *results, void (*handler)(QueryContextDispatchingSendBack sendback)) diff --git a/src/backend/cdb/cdbllize.c b/src/backend/cdb/cdbllize.c index dd647b1a9..bae6f2bec 100644 --- a/src/backend/cdb/cdbllize.c +++ b/src/backend/cdb/cdbllize.c @@ -1405,6 +1405,8 @@ motion_sanity_walker(Node *node, sanity_result_t *result) case T_ExternalScan: case T_MagmaIndexScan: case T_MagmaIndexOnlyScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_AppendOnlyScan: case T_ParquetScan: case T_IndexScan: diff --git a/src/backend/cdb/cdbpath.c b/src/backend/cdb/cdbpath.c index dc6489c1c..74772b8c4 100644 --- a/src/backend/cdb/cdbpath.c +++ b/src/backend/cdb/cdbpath.c @@ -1390,6 +1390,8 @@ cdbpath_dedup_fixup_walker(Path *path, void *context) case T_ExternalScan: case T_MagmaIndexScan: case T_MagmaIndexOnlyScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_AppendOnlyScan: case T_ParquetScan: case T_IndexScan: diff --git a/src/backend/cdb/cdbplan.c b/src/backend/cdb/cdbplan.c index debf274eb..3bc21d98a 100644 --- a/src/backend/cdb/cdbplan.c +++ b/src/backend/cdb/cdbplan.c @@ -367,6 +367,8 @@ plan_tree_mutator(Node *node, break; case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: { IndexScan *idxscan = (IndexScan *) node; IndexScan *newidxscan; diff --git a/src/backend/cdb/cdbquerycontextdispatching.c b/src/backend/cdb/cdbquerycontextdispatching.c index fcf11d819..0eb130175 100644 --- a/src/backend/cdb/cdbquerycontextdispatching.c +++ b/src/backend/cdb/cdbquerycontextdispatching.c @@ -3058,6 +3058,27 @@ DropQueryContextDispatchingSendBack(QueryContextDispatchingSendBack sendback) } +void UpdateCatalogOrcIndexModifiedOnSegments(QueryContextDispatchingSendBack sendback, List **segnoToVseg) +{ + Assert(NULL != sendback); + + NativeOrcIndexFile *idxs = (NativeOrcIndexFile *)(list_nth(*segnoToVseg, sendback->varblock)); + AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(sendback->relid, SnapshotNow); + Assert(aoEntry != NULL); + + for (int i = 0; i < sendback->numfiles; ++i) + { + updateOrcIndexFileInfo(aoEntry, idxs->indexOid, list_nth_int(idxs->segno, i), sendback->eof[i]); + } + + pfree(aoEntry); + + /* + * make the change available + */ + CommandCounterIncrement(); +} + void UpdateCatalogModifiedOnSegments(QueryContextDispatchingSendBack sendback) { diff --git a/src/backend/cdb/cdbtargeteddispatch.c b/src/backend/cdb/cdbtargeteddispatch.c index 8461e4132..28e7c1ebd 100644 --- a/src/backend/cdb/cdbtargeteddispatch.c +++ b/src/backend/cdb/cdbtargeteddispatch.c @@ -497,6 +497,8 @@ AssignContentIdsToPlanData_Walker(Node *node, void *context) break; case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: { IndexScan *indexScan = (IndexScan*)node; diff --git a/src/backend/cdb/dispatcher_mgr.c b/src/backend/cdb/dispatcher_mgr.c index a8286f57c..4405b6ca5 100644 --- a/src/backend/cdb/dispatcher_mgr.c +++ b/src/backend/cdb/dispatcher_mgr.c @@ -37,6 +37,7 @@ #include "cdb/workermgr.h" #include "libpq/libpq-be.h" #include "miscadmin.h" +#include "utils/faultinjector.h" #include "magma/cwrapper/magma-client-c.h" @@ -91,6 +92,16 @@ void mainDispatchFuncConnect(struct MyQueryExecutorGroup *qeGrp, foreach (lc, qeGrp->qes) { myQe = lfirst(lc); +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError + FaultInjectorType_e ret = FaultInjector_InjectFaultIfSet( + MainDispatchConnect, + DDLNotSpecified, + "", // databaseName + ""); // tableName + if(ret == FaultInjectorTypeDispatchError) goto error; +#endif + if (workermgr_should_query_stop(state)) goto error; if (!executormgr_main_doconnect(myQe)) goto error; @@ -107,6 +118,16 @@ void mainDispatchFuncRun(struct MyQueryExecutorGroup *qeGrp, struct MyQueryExecutor *myQe = NULL; bool catchProxyErr = true; +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError + FaultInjectorType_e ret = FaultInjector_InjectFaultIfSet( + MainDispatchSendPlan, + DDLNotSpecified, + "", // databaseName + ""); // tableName + if(ret == FaultInjectorTypeDispatchError) goto error; +#endif + ListCell *lc; foreach (lc, qeGrp->qes) { myQe = lfirst(lc); @@ -226,6 +247,16 @@ void proxyDispatchFuncRun(struct MyQueryExecutorGroup *qeGrp, foreach (lc, qeGrp->qes) { myQe = lfirst(lc); +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError + FaultInjectorType_e ret = FaultInjector_InjectFaultIfSet( + ProxyDispatchSendPlan, + DDLNotSpecified, + "", // databaseName + ""); // tableName + if(ret == FaultInjectorTypeDispatchError) goto error; +#endif + if (workermgr_should_query_stop(state)) { write_log("%s: query is canceled prior to dispatched.", __func__); goto error; diff --git a/src/backend/cdb/dispatcher_new.c b/src/backend/cdb/dispatcher_new.c index ee6cc695d..abf0c5c77 100644 --- a/src/backend/cdb/dispatcher_new.c +++ b/src/backend/cdb/dispatcher_new.c @@ -848,6 +848,13 @@ static void dispatchStmt(MyDispStmt *stmt, QueryResource *resource, bool newPlanner = can_convert_common_plan(data->queryDesc, &ctx); mainDispatchRun(data, &ctx, newPlanner); mainDispatchWait(data, false); + /* index stmt need to update catalog */ + if (stmt->node != NULL && IsA(stmt->node, IndexStmt)) + { + IndexStmt *idxStmt = (IndexStmt *)(stmt->node); + CdbDispatchResults *pr = mainDispatchGetResults((DispatchDataResult *) data); + cdbdisp_handleModifiedOrcIndexCatalogOnSegments(&(idxStmt->allidxinfos), pr, UpdateCatalogOrcIndexModifiedOnSegments); + } if (result && !mainDispatchHasError(data)) { int entryDBSegNum = 0; int segNum = list_length(resource->segments) + entryDBSegNum; diff --git a/src/backend/cdb/executormgr_new.c b/src/backend/cdb/executormgr_new.c index 808578304..18e41b083 100644 --- a/src/backend/cdb/executormgr_new.c +++ b/src/backend/cdb/executormgr_new.c @@ -35,6 +35,7 @@ #include "libpq/libpq-be.h" #include "miscadmin.h" #include "utils/lsyscache.h" +#include "utils/faultinjector.h" typedef enum MyQueryExecutorState { MYQES_UNINIT, /* Uninit state */ @@ -309,6 +310,15 @@ bool executormgr_main_consumeData(struct MyQueryExecutor *qe) { bool done = false; struct MyQueryExecutor *myQe = qe; +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError, + FaultInjectorType_e ret = + FaultInjector_InjectFaultIfSet(MainDispatchConsumeData, DDLNotSpecified, + "", // databaseName + ""); // tableName + if (ret == FaultInjectorTypeDispatchError) goto error; +#endif + if ((rc = PQconsumeInput(conn)) == 0) goto error; while (!PQisBusy(conn)) { /* Normal command finished */ @@ -353,6 +363,16 @@ bool executormgr_proxy_consumeData(struct MyQueryExecutor *qe) { bool done = false; CdbDispatchResult *resultSlot = qe->refResult; +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError, + // FaultInjectorQuietExit + FaultInjectorType_e ret = + FaultInjector_InjectFaultIfSet(ProxyDispatchConsumeData, DDLNotSpecified, + "", // databaseName + ""); // tableName + if (ret == FaultInjectorTypeDispatchError) goto error; +#endif + if (!PQconsumeInput(conn)) goto error; while (!PQisBusy(conn)) { @@ -401,6 +421,15 @@ bool executormgr_proxy_consumeData(struct MyQueryExecutor *qe) { } bool executormgr_proxy_doconnect(struct MyQueryExecutor *qe) { +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError + FaultInjectorType_e ret = + FaultInjector_InjectFaultIfSet(ProxyDispatcherConnect, DDLNotSpecified, + "", // databaseName + ""); // tableName + if (ret == FaultInjectorTypeDispatchError) goto error; +#endif + if (!cdbconn_proxy_doconnect(qe->desc, getTaskConnMsg(qe->refTask))) goto error; diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 542938716..6d27cc0fe 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -806,11 +806,11 @@ ExplainOnePlan_internal(PlannedStmt *plannedstmt, } - if (newExecutorMode) { - appendStringInfo(buf, "New executor mode: ON\n"); - if (pg_strcasecmp(new_scheduler_mode, new_scheduler_mode_on) == 0) - appendStringInfo(buf, "New scheduler mode: ON\n"); - } + if (newExecutorMode) { + appendStringInfo(buf, "New executor mode: ON\n"); + appendStringInfo(buf, "New interconnect type: %s\n", + show_new_interconnect_type()); + } if (Debug_print_execution_detail) { instr_time endtime; @@ -1162,6 +1162,12 @@ explain_outNode(StringInfo str, case T_IndexScan: pname = "Index Scan"; break; + case T_OrcIndexScan: + pname = "Orc Index Scan"; + break; + case T_OrcIndexOnlyScan: + pname = "Orc Index Only Scan"; + break; case T_DynamicIndexScan: pname = "Dynamic Index Scan"; break; @@ -1340,6 +1346,8 @@ explain_outNode(StringInfo str, switch (nodeTag(plan)) { case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: if (ScanDirectionIsBackward(((IndexScan *) plan)->indexorderdir)) appendStringInfoString(str, " Backward"); appendStringInfo(str, " using %s", @@ -1347,7 +1355,7 @@ explain_outNode(StringInfo str, /* FALL THRU */ case T_MagmaIndexScan: case T_MagmaIndexOnlyScan: - if (nodeTag(plan) != T_IndexScan) + if (nodeTag(plan) != T_IndexScan && nodeTag(plan) != T_OrcIndexScan && nodeTag(plan) != T_OrcIndexOnlyScan) appendStringInfo(str, " using index : %s", quote_identifier(((ExternalScan *) plan)->indexname)); case T_SeqScan: @@ -1520,6 +1528,8 @@ explain_outNode(StringInfo str, switch (nodeTag(plan)) { case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_DynamicIndexScan: show_scan_qual(((IndexScan *) plan)->indexqualorig, "Index Cond", diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c index 464a82e17..1c8c3d115 100644 --- a/src/backend/commands/indexcmds.c +++ b/src/backend/commands/indexcmds.c @@ -33,14 +33,18 @@ */ #include "postgres.h" +#include "postmaster/identity.h" #include "access/aosegfiles.h" #include "access/genam.h" #include "access/heapam.h" #include "access/fileam.h" +#include "access/orcam.h" +#include "access/orcsegfiles.h" #include "access/reloptions.h" #include "access/transam.h" #include "access/xact.h" +#include "catalog/pg_appendonly.h" #include "catalog/catalog.h" #include "catalog/catquery.h" #include "catalog/dependency.h" @@ -81,6 +85,7 @@ #include "cdb/cdbsrlz.h" #include "cdb/cdbvars.h" #include "cdb/cdbcat.h" +#include "cdb/cdbquerycontextdispatching.h" #include "cdb/cdbrelsize.h" #include "cdb/cdboidsync.h" #include "cdb/dispatcher.h" @@ -98,8 +103,154 @@ static Oid GetIndexOpClass(List *opclass, Oid attrType, static bool relationHasPrimaryKey(Relation rel); static bool relationHasUniqueIndex(Relation rel); +static bool CDBCreateIndex(IndexStmt *stmt, Oid relationOid, Oid indexOid); + bool gp_hash_index = false; /* hash index phase out. */ +/* dispatch index info to qe for native orc */ +bool CDBCreateIndex(IndexStmt *stmt, Oid relationOid, Oid indexOid) +{ + int target_segment_num = 0; + Relation rel = relation_open(relationOid, AccessShareLock); + + /* + * 1. calculate orc file num + * 2. calculate vseg num by rm_nvseg_perquery_perseg_limit * slaveHostNumber + * 3. take the smaller of the above as the vseg num + * 4. bind SegNO to Vseg + */ + if (relationOid > 0 ) + { + /* 1.calculate file segno */ + FileSegTotals *fstotal = getOrcSegFileStats(rel, SnapshotNow); + if (fstotal) + { + target_segment_num = fstotal->totalfilesegs; + if (target_segment_num == 0) + { + elog(LOG, "CDBCreateIndex need not to dispatch create index statement, for not data in orc files.\n"); + relation_close(rel, AccessShareLock); + return false; + } + pfree(fstotal); + } + } + relation_close(rel, AccessShareLock); + + /* 2.judge by guc value */ + int vsegNum = GetQueryVsegNum(); + /* 3. get the smaller */ + if (target_segment_num > vsegNum) + { + target_segment_num = vsegNum; + } + + elog(DEBUG1, "CDBCreateIndex virtual segment number is: %d\n", target_segment_num); + + QueryResource *resource = AllocateResource(QRL_ONCE, 1, 1, target_segment_num, target_segment_num, NULL, 0); + + /* 4.bind SegNO to Vseg */ + int total_segfiles = 0; + AppendOnlyEntry *aoEntry = GetAppendOnlyEntry(relationOid, SnapshotNow); + FileSegInfo **allfsinfo = getAllOrcFileSegInfo(aoEntry, SnapshotNow, &total_segfiles); + + for (int i = 0; i < total_segfiles; ++i) + { + int vseg = (allfsinfo[i]->segno - 1) % target_segment_num; + + insertInitialOrcIndexEntry(aoEntry, indexOid, allfsinfo[i]->segno); + + /* One VSEG may process more than one ORC file, so use list to save segno and eof */ + if (stmt->allidxinfos != NULL && stmt->allidxinfos->length > vseg) + { + NativeOrcIndexFile *idxs = lfirst(list_nth_cell(stmt->allidxinfos, vseg)); + idxs->segno = lappend_int(idxs->segno, allfsinfo[i]->segno); + int len = length(idxs->segno); + idxs->eof = repalloc(idxs->eof, len * sizeof(int64)); + idxs->eof[len - 1] = 0; + } + else + { + NativeOrcIndexFile *idxs = makeNode(NativeOrcIndexFile); + idxs->indexOid = indexOid; + idxs->segno = lappend_int(idxs->segno, allfsinfo[i]->segno); + idxs->eof = palloc0(sizeof(int64)); + idxs->eof[0] = 0; + /* + * save all file infos in indexstmt and dispatch to qes + * qe use list_nth_cell(allidxinfos, GetQEIndex()) get the file informations it needs to process + */ + stmt->allidxinfos = lappend(stmt->allidxinfos, idxs); + } + } + + /* qe has no metadata information, get columns info in qd */ + ListCell *cell; + cqContext *attcqCtx; + foreach(cell, stmt->indexParams) + { + IndexElem *idx = (IndexElem *) lfirst(cell); + HeapTuple atttuple; + attcqCtx = caql_getattname_scan(NULL, relationOid, idx->name); + atttuple = caql_get_current(attcqCtx); + if (HeapTupleIsValid(atttuple)) + { + Form_pg_attribute tuple = (Form_pg_attribute) GETSTRUCT(atttuple); + if (!tuple->attnotnull) + { + stmt->columnsToRead = lappend_int(stmt->columnsToRead, tuple->attnum); + } + } + caql_endscan(attcqCtx); + } + + stmt->relationOid = relationOid; + + /* native orc need to dispatch relation info */ + stmt->contextdisp = CreateQueryContextInfo(); + prepareDispatchedCatalogRelation(stmt->contextdisp, relationOid, FALSE, NULL, TRUE); + TupleDesc tupDesc = RelationGetDescr(rel); + Form_pg_attribute *attr = tupDesc->attrs; + for (int attnum = 1; attnum <= tupDesc->natts; ++attnum) + { + if (attr[attnum - 1]->attisdropped) continue; + prepareDispatchedCatalogType(stmt->contextdisp, attr[attnum - 1]->atttypid); + } + FinalizeQueryContextInfo(stmt->contextdisp); + + DispatchDataResult result; + mainDispatchStmtNode(stmt, NULL, resource, &result); + DropQueryContextInfo(stmt->contextdisp); + return true; +} + +void CDBDefineIndex(IndexStmt *stmt) +{ + Relation rel = relation_open(stmt->relationOid, NoLock); + /* 1. get orc file infos belong to this qe */ + NativeOrcIndexFile *idxs = (NativeOrcIndexFile *)(list_nth(stmt->allidxinfos, GetQEIndex())); + /* 2. call native orc index interface to build index data */ + int keyCount = list_length(stmt->indexParams) - list_length(stmt->indexIncludingParams); + int64 *eof = orcCreateIndex(rel, idxs->indexOid, idxs->segno, idxs->eof, stmt->columnsToRead, keyCount); + + /* 3. callback to qd to update eof info */ + QueryContextDispatchingSendBack sendback = CreateQueryContextDispatchingSendBack(length(idxs->segno)); + sendback->relid = stmt->relationOid; + sendback->varblock = GetQEIndex(); + sendback->numfiles = length(idxs->segno); + for (int i = 0; i < sendback->numfiles; ++i) + { + sendback->eof[i] = eof[i]; + } + StringInfo buf = PreSendbackChangedCatalog(1); + AddSendbackChangedCatalogContent(buf, sendback); + + DropQueryContextDispatchingSendBack(sendback); + FinishSendbackChangedCatalog(buf); + relation_close(rel, NoLock); + return; +} + /* * DefineIndex * Creates a new index. @@ -402,6 +553,11 @@ DefineIndex(Oid relationId, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes are not supported"))); + if (accessMethodId == BITMAP_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("bitmap indexes are not supported"))); + /* MPP-9329: disable creation of GIN indexes */ if (accessMethodId == GIN_AM_OID) ereport(ERROR, @@ -760,7 +916,9 @@ DefineIndex(Oid relationId, { ereport(ERROR, (errcode(ERRCODE_CDB_FEATURE_NOT_YET), errmsg("Cannot support DefineIndex"))); } - // dispatch_statement_node((Node *)stmt, NULL, NULL, NULL); + /* native orc need to dispatch index info to qe */ + if (RelationIsOrc(rel)) + CDBCreateIndex(stmt, relationId, indexRelationId); } } diff --git a/src/backend/executor/execAmi.c b/src/backend/executor/execAmi.c index 818f642e7..894523eed 100644 --- a/src/backend/executor/execAmi.c +++ b/src/backend/executor/execAmi.c @@ -136,6 +136,8 @@ ExecReScan(PlanState *node, ExprContext *exprCtxt) break; case T_IndexScanState: + case T_OrcIndexScanState: + case T_OrcIndexOnlyScanState: ExecIndexReScan((IndexScanState *) node, exprCtxt); break; @@ -633,6 +635,8 @@ ExecEagerFree(PlanState *node) break; case T_IndexScanState: + case T_OrcIndexScanState: + case T_OrcIndexOnlyScanState: ExecEagerFreeIndexScan((IndexScanState *)node); break; diff --git a/src/backend/executor/execProcnode.c b/src/backend/executor/execProcnode.c index b01897eeb..ec4dfd016 100644 --- a/src/backend/executor/execProcnode.c +++ b/src/backend/executor/execProcnode.c @@ -374,6 +374,8 @@ ExecInitNode(Plan *node, EState *estate, int eflags) break; case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: curMemoryAccount = CREATE_EXECUTOR_MEMORY_ACCOUNT(isAlienPlanNode, node, IndexScan); START_MEMORY_ACCOUNT(curMemoryAccount); @@ -859,6 +861,8 @@ ExecProcNode(PlanState *node) &&Exec_Jmp_MagmaIndexScan, &&Exec_Jmp_MagmaIndexOnlyScan, &&Exec_Jmp_MagmaBitmapScan, + &&Exec_Jmp_OrcIndexScan, + &&Exec_Jmp_OrcIndexOnlyScan, }; COMPILE_ASSERT((T_Plan_End - T_Plan_Start) == (T_PlanState_End - T_PlanState_Start)); @@ -1047,6 +1051,14 @@ ExecProcNode(PlanState *node) /* Todo: should to create magmabitmapscannode */ goto Exec_Jmp_Done; +Exec_Jmp_OrcIndexScan: + result = ExecIndexScan((IndexScanState *) node); + goto Exec_Jmp_Done; + +Exec_Jmp_OrcIndexOnlyScan: + result = ExecIndexScan((IndexScanState *) node); + goto Exec_Jmp_Done; + Exec_Jmp_Done: if (node->instrument) { InstrStopNode(node->instrument, TupIsNull(result) ? 0.0 : 1.0); @@ -1360,6 +1372,8 @@ ExecCountSlotsNode(Plan *node) return ExecCountSlotsExternalScan((ExternalScan *) node); case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: return ExecCountSlotsIndexScan((IndexScan *) node); case T_DynamicIndexScan: @@ -1628,6 +1642,8 @@ ExecEndNode(PlanState *node) break; case T_IndexScanState: + case T_OrcIndexScanState: + case T_OrcIndexOnlyScanState: ExecEndIndexScan((IndexScanState *) node); break; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index 2b8f0214f..87d8c1710 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -2456,6 +2456,8 @@ sendInitGpmonPkts(Plan *node, EState *estate) case T_ParquetScan: case T_ExternalScan: case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_BitmapIndexScan: case T_TidScan: case T_FunctionScan: diff --git a/src/backend/executor/nodeIndexscan.c b/src/backend/executor/nodeIndexscan.c index d91bcc75c..beca1dcaa 100644 --- a/src/backend/executor/nodeIndexscan.c +++ b/src/backend/executor/nodeIndexscan.c @@ -668,7 +668,8 @@ ExecCountSlotsIndexScan(IndexScan *node) void initGpmonPktForIndexScan(Plan *planNode, gpmon_packet_t *gpmon_pkt, EState *estate) { - Assert(planNode != NULL && gpmon_pkt != NULL && IsA(planNode, IndexScan)); + Assert(planNode != NULL && gpmon_pkt != NULL && (IsA(planNode, IndexScan) || + IsA(planNode, OrcIndexScan) || IsA(planNode, OrcIndexOnlyScan))); { char *relname = get_rel_name(((IndexScan *)planNode)->indexid); diff --git a/src/backend/gp_libpq_fe/fe-connect.c b/src/backend/gp_libpq_fe/fe-connect.c index f75d7da08..2d3ce399a 100644 --- a/src/backend/gp_libpq_fe/fe-connect.c +++ b/src/backend/gp_libpq_fe/fe-connect.c @@ -47,6 +47,7 @@ #include "pg_config_paths.h" #include "cdb/cdbvars.h" +#include "utils/faultinjector.h" #ifdef WIN32 #include "win32.h" @@ -3525,6 +3526,15 @@ bool PQgetQEsDetail(PGconn *conn, char *connMsg, int connMsgLen) { else return false; } +#ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeDispatchError + FaultInjectorType_e ret = FaultInjector_InjectFaultIfSet( + MainDispatchGetQEsDetail, + DDLNotSpecified, + "", // databaseName + ""); // tableName + if(ret == FaultInjectorTypeDispatchError) return false; +#endif return true; } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index bf782ac4f..f68767d03 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -558,6 +558,32 @@ _copyIndexScan(IndexScan *from) return newnode; } +/* + * _copyOrcIndexScan + */ +static OrcIndexScan * +_copyOrcIndexScan(OrcIndexScan *from) +{ + OrcIndexScan *newnode = makeNode(OrcIndexScan); + + copyIndexScanFields(from, newnode); + + return newnode; +} + +/* + * _copyOrcIndexOnlyScan + */ +static OrcIndexOnlyScan * +_copyOrcIndexOnlyScan(OrcIndexOnlyScan *from) +{ + OrcIndexOnlyScan *newnode = makeNode(OrcIndexOnlyScan); + + copyIndexScanFields(from, newnode); + + return newnode; +} + static MagmaIndexScan * _copyMagmaIndexScan(MagmaIndexScan *from) { @@ -4517,6 +4543,12 @@ copyObject(void *from) case T_IndexScan: retval = _copyIndexScan(from); break; + case T_OrcIndexScan: + retval = _copyOrcIndexScan(from); + break; + case T_OrcIndexOnlyScan: + retval = _copyOrcIndexOnlyScan(from); + break; case T_DynamicIndexScan: retval = _copyDynamicIndexScan(from); break; diff --git a/src/backend/nodes/outfast.c b/src/backend/nodes/outfast.c index 51ed96a79..1409f38f8 100644 --- a/src/backend/nodes/outfast.c +++ b/src/backend/nodes/outfast.c @@ -716,6 +716,22 @@ _outIndexScan(StringInfo str, IndexScan *node) outIndexScanFields(str, node); } +static void +_outOrcIndexScan(StringInfo str, OrcIndexScan *node) +{ + WRITE_NODE_TYPE("ORCINDEXSCAN"); + + outIndexScanFields(str, node); +} + +static void +_outOrcIndexOnlyScan(StringInfo str, OrcIndexOnlyScan *node) +{ + WRITE_NODE_TYPE("ORCINDEXONLYSCAN"); + + outIndexScanFields(str, node); +} + static void _outDynamicIndexScan(StringInfo str, DynamicIndexScan *node) { @@ -2275,6 +2291,18 @@ _outSegFileSplitMapNode(StringInfo str, SegFileSplitMapNode *node) } +static void +_outNativeOrcIndexFile(StringInfo str, NativeOrcIndexFile *node) +{ + WRITE_NODE_TYPE("NATIVEORCINDEXFILE"); + + WRITE_OID_FIELD(indexOid); + WRITE_NODE_FIELD(segno); + int len = length(node->segno); + for(int i = 0; i < len; i++) + WRITE_UINT64_FIELD(eof[i]); +} + static void _outExtTableTypeDesc(StringInfo str, ExtTableTypeDesc *node) { @@ -2346,6 +2374,10 @@ _outIndexStmt(StringInfo str, IndexStmt *node) WRITE_OID_FIELD(constrOid); WRITE_BOOL_FIELD(concurrent); WRITE_NODE_FIELD(idxOids); + WRITE_OID_FIELD(relationOid); + WRITE_NODE_FIELD(allidxinfos); + WRITE_NODE_FIELD(columnsToRead); + WRITE_NODE_FIELD(contextdisp); } static void @@ -4055,6 +4087,12 @@ _outNode(StringInfo str, void *obj) case T_IndexScan: _outIndexScan(str, obj); break; + case T_OrcIndexScan: + _outOrcIndexScan(str, obj); + break; + case T_OrcIndexOnlyScan: + _outOrcIndexOnlyScan(str, obj); + break; case T_MagmaIndexScan: _outMagmaIndexScan(str, obj); break; @@ -4421,6 +4459,10 @@ _outNode(StringInfo str, void *obj) _outSegFileSplitMapNode(str, obj); break; + case T_NativeOrcIndexFile: + _outNativeOrcIndexFile(str, obj); + break; + case T_ExtTableTypeDesc: _outExtTableTypeDesc(str, obj); break; diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index 429e4e1d3..c7ed4da84 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -673,6 +673,22 @@ _outIndexScan(StringInfo str, IndexScan *node) outIndexScanFields(str, node); } +static void +_outOrcIndexScan(StringInfo str, OrcIndexScan *node) +{ + WRITE_NODE_TYPE("ORCINDEXSCAN"); + + outIndexScanFields(str, node); +} + +static void +_outOrcIndexOnlyScan(StringInfo str, OrcIndexOnlyScan *node) +{ + WRITE_NODE_TYPE("ORCINDEXONLYSCAN"); + + outIndexScanFields(str, node); +} + static void _outDynamicIndexScan(StringInfo str, DynamicIndexScan *node) { @@ -2276,6 +2292,10 @@ _outIndexStmt(StringInfo str, IndexStmt *node) WRITE_OID_FIELD(constrOid); WRITE_BOOL_FIELD(concurrent); WRITE_NODE_FIELD(idxOids); + WRITE_OID_FIELD(relationOid); + WRITE_NODE_FIELD(allidxinfos); + WRITE_NODE_FIELD(columnsToRead); + WRITE_NODE_FIELD(contextdisp); } static void @@ -2926,6 +2946,18 @@ _outResultRelSegFileInfoMapNode(StringInfo str, ResultRelSegFileInfoMapNode *nod WRITE_NODE_FIELD(segfileinfos); } +static void +_outNativeOrcIndexFile(StringInfo str, NativeOrcIndexFile *node) +{ + WRITE_NODE_TYPE("NATIVEORCINDEXFILE"); + + WRITE_OID_FIELD(indexOid); + WRITE_NODE_FIELD(segno); + int len = length(node->segno); + for(int i = 0; i < len; i++) + WRITE_UINT64_FIELD(eof[i]); +} + static void _outDefineStmt(StringInfo str, DefineStmt *node) { @@ -4240,6 +4272,12 @@ _outNode(StringInfo str, void *obj) case T_IndexScan: _outIndexScan(str, obj); break; + case T_OrcIndexScan: + _outOrcIndexScan(str, obj); + break; + case T_OrcIndexOnlyScan: + _outOrcIndexOnlyScan(str, obj); + break; case T_MagmaIndexScan: _outMagmaIndexScan(str, obj); break; @@ -4610,6 +4648,9 @@ _outNode(StringInfo str, void *obj) case T_ResultRelSegFileInfoMapNode: _outResultRelSegFileInfoMapNode(str, obj); break; + case T_NativeOrcIndexFile: + _outNativeOrcIndexFile(str, obj); + break; case T_ExtTableTypeDesc: _outExtTableTypeDesc(str, obj); break; diff --git a/src/backend/nodes/print.c b/src/backend/nodes/print.c index 3d24ccb6c..6ce47a2cc 100644 --- a/src/backend/nodes/print.c +++ b/src/backend/nodes/print.c @@ -517,6 +517,10 @@ char * plannode_type(Plan *p) return "EXTERNALSCAN"; case T_IndexScan: return "INDEXSCAN"; + case T_OrcIndexScan: + return "ORCINDEXSCAN"; + case T_OrcIndexOnlyScan: + return "ORCINDEXONLYSCAN"; case T_MagmaIndexScan: return "MAGMAINDEXSCAN"; case T_MagmaIndexOnlyScan: diff --git a/src/backend/nodes/readfast.c b/src/backend/nodes/readfast.c index f14ea9840..33714403b 100644 --- a/src/backend/nodes/readfast.c +++ b/src/backend/nodes/readfast.c @@ -776,6 +776,10 @@ _readIndexStmt(const char ** str) READ_OID_FIELD(constrOid); READ_BOOL_FIELD(concurrent); READ_NODE_FIELD(idxOids); + READ_OID_FIELD(relationOid); + READ_NODE_FIELD(allidxinfos); + READ_NODE_FIELD(columnsToRead); + READ_NODE_FIELD(contextdisp); READ_DONE(); } @@ -2301,6 +2305,29 @@ _readSegFileSplitMapNode(const char **str) READ_DONE(); } +static NativeOrcIndexFile * +_readNativeOrcIndexFile(const char **str) +{ + READ_LOCALS(NativeOrcIndexFile); + + READ_OID_FIELD(indexOid); + READ_NODE_FIELD(segno); + + int len = length(local_node->segno); + + if (len > 0) + { + local_node->eof = palloc(sizeof(int64) * len); + + for(int i = 0; i < len; i++) + { + READ_UINT64_FIELD(eof[i]); + } + } + + READ_DONE(); +} + static FileSplitNode * _readFileSplitNode(const char **str) { @@ -3196,6 +3223,32 @@ _readIndexScan(const char ** str) READ_DONE(); } +/* + * _readOrcIndexScan + */ +static OrcIndexScan * +_readOrcIndexScan(const char ** str) +{ + READ_LOCALS(OrcIndexScan); + + readIndexScanFields(str, local_node); + + READ_DONE(); +} + +/* + * _readOrcIndexOnlyScan + */ +static OrcIndexOnlyScan * +_readOrcIndexOnlyScan(const char ** str) +{ + READ_LOCALS(OrcIndexOnlyScan); + + readIndexScanFields(str, local_node); + + READ_DONE(); +} + static DynamicIndexScan * _readDynamicIndexScan(const char **str) { @@ -4326,6 +4379,12 @@ readNodeBinary(const char ** str) case T_IndexScan: return_value = _readIndexScan(str); break; + case T_OrcIndexScan: + return_value = _readOrcIndexScan(str); + break; + case T_OrcIndexOnlyScan: + return_value = _readOrcIndexOnlyScan(str); + break; case T_MagmaIndexScan: return_value = _readMagmaIndexScan(str); break; @@ -4603,6 +4662,9 @@ readNodeBinary(const char ** str) case T_SegFileSplitMapNode: return_value = _readSegFileSplitMapNode(str); break; + case T_NativeOrcIndexFile: + return_value = _readNativeOrcIndexFile(str); + break; case T_FileSplitNode: return_value = _readFileSplitNode(str); break; diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index df5775ce8..f68e72ca7 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -382,15 +382,11 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } /* Consider sequential scan. */ - if ((root->config->enable_seqscan) && (relstorage != RELSTORAGE_EXTERNAL)) - pathlist = lappend(pathlist, seqpath); + if ((root->config->enable_seqscan) && (relstorage != RELSTORAGE_EXTERNAL)) + pathlist = lappend(pathlist, seqpath); /* Consider index and bitmap scans */ - if (!relstorage_is_ao(relstorage)) - { - /* Temporarily disable index for ao table */ - create_index_paths(root, rel, relstorage, &indexpathlist, &bitmappathlist); - } + create_index_paths(root, rel, relstorage, &indexpathlist, &bitmappathlist); /* deal with magma index scan */ if (relstorage == RELSTORAGE_EXTERNAL) @@ -416,6 +412,48 @@ set_plain_rel_pathlist(PlannerInfo *root, RelOptInfo *rel, RangeTblEntry *rte) } if (bitmappathlist && root->config->enable_magma_bitmapscan) pathlist = list_concat(pathlist, bitmappathlist); + /* If no enabled path was found, consider seq path. */ + if (!pathlist) + { + pathlist = lappend(pathlist, seqpath); + } + + foreach(cell, pathlist) + add_path(root, rel, (Path *)lfirst(cell)); + + /* Now find the cheapest of the paths for this rel */ + set_cheapest(root, rel); + return; + } + + /* deal with native orc index scan */ + if (relstorage == RELSTORAGE_ORC) + { + if (indexpathlist && ((root->config->enable_orc_indexscan) + || (root->config->enable_orc_indexonlyscan))) + { + ListCell *l; + foreach(l, indexpathlist) + { + IndexPath *ipath = (IndexPath *) lfirst(l); + if ((root->config->enable_orc_indexscan) && + (!ipath->indexonly)) + { + pathlist = lappend(pathlist, ipath); + } + else if ((root->config->enable_orc_indexonlyscan) && + (ipath->indexonly)) + { + pathlist = lappend(pathlist, ipath); + } + } + } + + /* If no enabled path was found, consider seq path. */ + if (!pathlist) + { + pathlist = lappend(pathlist, seqpath); + } /* Add them, now that we know whether the quals specify a unique key. */ foreach(cell, pathlist) add_path(root, rel, (Path *)lfirst(cell)); diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 24a22c768..ae88f8138 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -288,7 +288,9 @@ create_index_paths(PlannerInfo *root, RelOptInfo *rel, Path *path = NULL; bitmapqual = choose_bitmap_and(root, rel, bitindexpaths, NULL); - path = create_bitmap_scan_path(relstorage, root, rel, bitmapqual, NULL); + /* so far, native orc can't support bitmap scan */ + if (relstorage != RELSTORAGE_ORC) + path = create_bitmap_scan_path(relstorage, root, rel, bitmapqual, NULL); *pbitmappathlist = lappend(*pbitmappathlist, path); } } @@ -487,7 +489,8 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, * plain indexscans, this isn't relevant since bitmap scans don't support * index data retrieval anyway. */ - if (rel->ext == RELSTORAGE_EXTERNAL) + /* so far, magma and native orc can support index only scan */ + if (rel->ext == RELSTORAGE_EXTERNAL || rel->ext == RELSTORAGE_ORC) { index_only_scan = check_index_only(rel, index); } @@ -1807,7 +1810,7 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, /* disable magma bitmap scan for inner join * because magma index scan cant support dynamic filter */ - && (relstorage != RELSTORAGE_EXTERNAL)) + && (relstorage != RELSTORAGE_EXTERNAL) && (relstorage != RELSTORAGE_ORC)) { Path *bitmapqual; Path *bpath; @@ -2672,7 +2675,9 @@ check_index_only(RelOptInfo *rel, IndexOptInfo *index) int i; /* Index-only scans must be enabled */ - if (!enable_magma_indexonlyscan) + if (rel->ext == RELSTORAGE_EXTERNAL && (!enable_magma_indexonlyscan)) + return false; + if (rel->ext == RELSTORAGE_ORC && (!enable_orc_indexonlyscan)) return false; /* diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 9dd3a0922..7e0961565 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -162,7 +162,7 @@ static ExternalScan *make_externalscan(List *qptlist, static IndexScan *make_indexscan(List *qptlist, List *qpqual, Index scanrelid, Oid indexid, List *indexqual, List *indexqualorig, List *indexstrategy, List *indexsubtype, - ScanDirection indexscandir); + ScanDirection indexscandir, bool indexonly, NodeTag pathtype); static MagmaIndexScan *make_magma_indexscan(List *qptlist, List *qpqual, Index scanrelid, @@ -284,6 +284,8 @@ create_subplan(CreatePlanContext *ctx, Path *best_path) case T_MagmaIndexScan: case T_MagmaIndexOnlyScan: case T_MagmaBitmapScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_CteScan: plan = create_scan_plan(ctx, best_path); break; @@ -350,7 +352,8 @@ create_scan_plan(CreatePlanContext *ctx, Path *best_path) */ if (use_physical_tlist(ctx, rel)) { - if (best_path->pathtype == T_MagmaIndexOnlyScan) + if (best_path->pathtype == T_MagmaIndexOnlyScan || best_path->pathtype == T_OrcIndexOnlyScan || + best_path->pathtype == T_OrcIndexScan) { /* For index-only scan, the preferred tlist is the index's */ tlist = copyObject(((IndexPath *) best_path)->indexinfo->indextlist); @@ -418,6 +421,12 @@ create_scan_plan(CreatePlanContext *ctx, Path *best_path) scan_clauses); break; + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: + plan = (Plan *) create_indexscan_plan( + ctx, (IndexPath *) best_path, tlist, scan_clauses, NULL); + break; + case T_MagmaBitmapScan: plan = (Plan *) create_magma_bitmap_scan_plan(ctx, (BitmapHeapPath *) best_path, @@ -606,6 +615,8 @@ disuse_physical_tlist(Plan *plan, Path *path) case T_ParquetScan: case T_ExternalScan: case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_BitmapHeapScan: case T_BitmapTableScan: case T_TidScan: @@ -2133,7 +2144,9 @@ create_indexscan_plan(CreatePlanContext *ctx, stripped_indexquals, indexstrategy, indexsubtype, - best_path->indexscandir); + best_path->indexscandir, + best_path->indexonly, + best_path->path.pathtype); copy_path_costsize(ctx->root, &scan_plan->scan.plan, &best_path->path); @@ -3814,9 +3827,12 @@ make_indexscan(List *qptlist, List *indexqualorig, List *indexstrategy, List *indexsubtype, - ScanDirection indexscandir) + ScanDirection indexscandir, + bool indexonly, + NodeTag pathtype) { IndexScan *node = makeNode(IndexScan); + node->scan.plan.type = pathtype; Plan *plan = &node->scan.plan; /* cost should be inserted by caller */ @@ -3826,6 +3842,7 @@ make_indexscan(List *qptlist, plan->righttree = NULL; node->scan.scanrelid = scanrelid; + node->indexonly = indexonly; node->indexid = indexid; node->indexqual = indexqual; node->indexqualorig = indexqualorig; diff --git a/src/backend/optimizer/plan/newPlanner.c b/src/backend/optimizer/plan/newPlanner.c index dc5546ed0..26be1c8fd 100644 --- a/src/backend/optimizer/plan/newPlanner.c +++ b/src/backend/optimizer/plan/newPlanner.c @@ -37,6 +37,7 @@ #include "optimizer/var.h" #include "parser/parsetree.h" #include "storage/cwrapper/orc-format-c.h" +#include "storage/cwrapper/text-format-c.h" #include "univplan/cwrapper/univplan-c.h" #include "utils/acl.h" #include "utils/builtins.h" @@ -68,11 +69,11 @@ char *new_scheduler_mode; int new_interconnect_type; const char *show_new_interconnect_type() { switch (new_interconnect_type) { - case INTERCONNECT_TYPE_UDP: - return "UDP"; - case INTERCONNECT_TYPE_TCP: - default: - return "TCP"; + case INTERCONNECT_TYPE_UDP: + return "UDP"; + case INTERCONNECT_TYPE_TCP: + default: + return "TCP"; } } @@ -97,8 +98,8 @@ static bool do_convert_initplan_to_common_plan(Plan *node, static bool do_convert_hashExpr_to_common_plan(Motion *node, CommonPlanContext *ctx); static void do_convert_onetbl_to_common_plan(Oid relid, CommonPlanContext *ctx); -static void -do_convert_magma_rangevseg_map_to_common_plan(CommonPlanContext *ctx); +static void do_convert_magma_rangevseg_map_to_common_plan( + CommonPlanContext *ctx); static void do_convert_rangetbl_to_common_plan(List *rtable, CommonPlanContext *ctx); static void do_convert_result_partitions_to_common_plan( @@ -123,9 +124,8 @@ static bool do_convert_mergejoin_clause_to_common_plan(MergeJoin *node, CommonPlanContext *ctx); static bool do_convert_result_qual_to_common_plan(Result *node, CommonPlanContext *ctx); -static bool -do_convert_subqueryscan_subplan_to_common_plan(SubqueryScan *node, - CommonPlanContext *ctx); +static bool do_convert_subqueryscan_subplan_to_common_plan( + SubqueryScan *node, CommonPlanContext *ctx); static Expr *parentExprSwitchTo(Expr *parent, CommonPlanContext *ctx); static void setDummyTListRef(CommonPlanContext *ctx); static void unsetDummyTListRef(CommonPlanContext *ctx); @@ -154,61 +154,165 @@ static const char *buildInternalTableFormatOptionStringInJson(Relation rel) { #define INT64_MAX_LENGTH 20 static bool checkSupportedTableFormat(Node *node, CommonPlanContext *cxt) { - if (NULL == node) - return false; + if (NULL == node) return false; switch (nodeTag(node)) { - case T_MagmaIndexScan: - case T_MagmaIndexOnlyScan: - case T_ExternalScan: { - ExternalScan *n = (ExternalScan *)node; - char fmtType = n->fmtType; - char *fmtName = NULL; - fmtName = getExtTblFormatterTypeInFmtOptsList(n->fmtOpts); - - if (fmtType == 'b') { - if (!pg_strncasecmp(fmtName, ORCTYPE, sizeof(ORCTYPE) - 1)) { - cxt->querySelect = true; + case T_MagmaIndexScan: + case T_MagmaIndexOnlyScan: + case T_ExternalScan: { + ExternalScan *n = (ExternalScan *)node; + char fmtType = n->fmtType; + char *fmtName = NULL; + fmtName = getExtTblFormatterTypeInFmtOptsList(n->fmtOpts); + + if (fmtType == 'b') { + if (!pg_strcasecmp(fmtName, ORCTYPE) || + !pg_strcasecmp(fmtName, TEXTTYPE) || + !pg_strcasecmp(fmtName, CSVTYPE)) { + cxt->querySelect = true; + } + if (!pg_strncasecmp(fmtName, MAGMATYPE, sizeof(MAGMATYPE) - 1)) { + cxt->querySelect = true; + cxt->isMagma = true; + cxt->magmaRelIndex = n->scan.scanrelid; + } } - if (!pg_strncasecmp(fmtName, MAGMATYPE, sizeof(MAGMATYPE) - 1)) { + + if (fmtName) pfree(fmtName); + break; + } + case T_AppendOnlyScan: { + AppendOnlyScan *n = (AppendOnlyScan *)node; + RangeTblEntry *rte = rt_fetch(n->scan.scanrelid, cxt->stmt->rtable); + if (RELSTORAGE_ORC == get_rel_relstorage(rte->relid)) { cxt->querySelect = true; - cxt->isMagma = true; - cxt->magmaRelIndex = n->scan.scanrelid; + break; + } else { + cxt->convertible = false; + return true; } } - if (fmtName) - pfree(fmtName); - break; - } - case T_AppendOnlyScan: { - AppendOnlyScan *n = (AppendOnlyScan *)node; - RangeTblEntry *rte = rt_fetch(n->scan.scanrelid, cxt->stmt->rtable); - if (RELSTORAGE_ORC == get_rel_relstorage(rte->relid)) { - cxt->querySelect = true; - break; - } else { + case T_ParquetScan: { cxt->convertible = false; return true; } + default: + break; } - case T_ParquetScan: { - cxt->convertible = false; - return true; + return plan_tree_walker(node, checkSupportedTableFormat, cxt); +} + +void buildDefaultFormatterOptionsInJson(int encoding, char externalFmtType, + struct json_object *optJsonObject) { + if (json_object_object_get(optJsonObject, "delimiter") == NULL) { + json_object_object_add( + optJsonObject, "delimiter", + json_object_new_string((externalFmtType == TextFormatTypeTXT) ? "\t" + : ",")); } - default: - break; + + if (json_object_object_get(optJsonObject, "null") == NULL) { + json_object_object_add( + optJsonObject, "null", + json_object_new_string((externalFmtType == TextFormatTypeTXT) ? "\\N" + : "")); } - return plan_tree_walker(node, checkSupportedTableFormat, cxt); + if (json_object_object_get(optJsonObject, "fill_missing_fields") == NULL) { + json_object_object_add(optJsonObject, "fill_missing_fields", + json_object_new_boolean(0)); + } else { + json_object_object_del(optJsonObject, "fill_missing_fields"); + json_object_object_add(optJsonObject, "fill_missing_fields", + json_object_new_boolean(1)); + } + + if (json_object_object_get(optJsonObject, "header") == NULL) { + json_object_object_add(optJsonObject, "header", json_object_new_boolean(0)); + } else { + json_object_object_del(optJsonObject, "header"); + json_object_object_add(optJsonObject, "header", json_object_new_boolean(1)); + } + + if (json_object_object_get(optJsonObject, "reject_limit") == NULL) { + json_object_object_add(optJsonObject, "reject_limit", + json_object_new_int(0)); + } else { + struct json_object *val = + json_object_object_get(optJsonObject, "reject_limit"); + if (!json_object_is_type(val, json_type_int)) { + const char *str = json_object_get_string(val); + char *endPtr = NULL; + int reject_limit = strtol(str, &endPtr, 10); + if (*endPtr != '\0') { + reject_limit = 0; + } + json_object_object_del(optJsonObject, "reject_limit"); + json_object_object_add(optJsonObject, "reject_limit", + json_object_new_int(reject_limit)); + } + } + + if (json_object_object_get(optJsonObject, "err_table") == NULL) { + json_object_object_add(optJsonObject, "err_table", + json_object_new_string("")); + } + + if (json_object_object_get(optJsonObject, "newline") == NULL) { + json_object_object_add(optJsonObject, "newline", + json_object_new_string("lf")); + } + + if (json_object_object_get(optJsonObject, "encoding") == NULL) { + const char *encodingStr = pg_encoding_to_char(encoding); + char lowerCaseEncodingStr[64]; + strcpy(lowerCaseEncodingStr, encodingStr); + for (char *p = lowerCaseEncodingStr; *p != '\0'; ++p) { + *p = tolower(*p); + } + + json_object_object_add(optJsonObject, "encoding", + json_object_new_string(lowerCaseEncodingStr)); + } + + if (externalFmtType == TextFormatTypeCSV && + json_object_object_get(optJsonObject, "quote") == NULL) { + json_object_object_add(optJsonObject, "quote", + json_object_new_string("\"")); + } + + if (json_object_object_get(optJsonObject, "escape") == NULL) { + if (externalFmtType == TextFormatTypeCSV) { + /* Let escape follow quote's setting */ + struct json_object *val = json_object_object_get(optJsonObject, "quote"); + json_object_object_add( + optJsonObject, "escape", + json_object_new_string(json_object_get_string(val))); + } else { + json_object_object_add(optJsonObject, "escape", + json_object_new_string("\\")); + } + } + + if (json_object_object_get(optJsonObject, "force_quote") == NULL) { + json_object_object_add(optJsonObject, "force_quote", + json_object_new_string("")); + } + + /* This is for csv formatter only */ + if (externalFmtType == TextFormatTypeCSV && + json_object_object_get(optJsonObject, "force_notnull") == NULL) { + json_object_object_add(optJsonObject, "force_notnull", + json_object_new_string("")); + } } bool can_convert_common_plan(QueryDesc *queryDesc, CommonPlanContext *ctx) { PlannedStmt *stmt = queryDesc ? queryDesc->plannedstmt : NULL; // disable for cursor and bind message - if (!queryDesc || queryDesc->extended_query) - return false; + if (!queryDesc || queryDesc->extended_query) return false; // Disable new executor when too many TCP connection. // Here it considers only the TCP client number of the root plan, regardless @@ -235,20 +339,17 @@ bool can_convert_common_plan(QueryDesc *queryDesc, CommonPlanContext *ctx) { planner_init_common_plan_context(stmt, ctx); // Fix issue 817 - if (checkIsPrepareQuery(queryDesc)) - goto end; + if (checkIsPrepareQuery(queryDesc)) goto end; stmt->planner_segments = queryDesc->planner_segments; stmt->originNodeType = queryDesc->originNodeType; convert_to_common_plan(stmt, ctx); - if (!ctx->convertible) - goto end; + if (!ctx->convertible) goto end; convert_querydesc_to_common_plan(queryDesc, ctx); - if (!ctx->convertible) - goto end; + if (!ctx->convertible) goto end; return true; @@ -288,8 +389,7 @@ void convert_rangenum_to_common_plan(PlannedStmt *stmt, void convert_to_common_plan(PlannedStmt *stmt, CommonPlanContext *ctx) { checkUnsupportedStmt(stmt, ctx); - if (ctx->convertible) - checkSupportedTableFormat((Node *)stmt->planTree, ctx); + if (ctx->convertible) checkSupportedTableFormat((Node *)stmt->planTree, ctx); if (ctx->convertible) { int32_t pid = -1; @@ -370,8 +470,7 @@ void convert_to_common_plan(PlannedStmt *stmt, CommonPlanContext *ctx) { do_convert_result_partitions_to_common_plan(stmt->result_partitions, ctx); if (ctx->convertible && enable_secure_filesystem) do_convert_token_map_to_common_plan(ctx); - if (ctx->convertible && ctx->isMagma) - do_convert_snapshot_to_common_plan(ctx); + if (ctx->convertible && ctx->isMagma) do_convert_snapshot_to_common_plan(ctx); } void planner_init_common_plan_context(PlannedStmt *stmt, @@ -407,21 +506,20 @@ void planner_destroy_common_plan_context(CommonPlanContext *ctx, bool enforce) { void get_all_stageno_from_plantree(Plan *node, int32_t *stageNo, int32_t *stageNum, bool *isInitPlan) { - if (node == NULL) - return; + if (node == NULL) return; switch (nodeTag(node)) { - case T_Motion: { - Motion *m = (Motion *)node; - stageNo[*stageNum] = m->motionID; - (*stageNum)++; - break; - } - case T_SubqueryScan: { - SubqueryScan *subqueryscan = (SubqueryScan *)node; - get_all_stageno_from_plantree(subqueryscan->subplan, stageNo, stageNum, - isInitPlan); - } + case T_Motion: { + Motion *m = (Motion *)node; + stageNo[*stageNum] = m->motionID; + (*stageNum)++; + break; + } + case T_SubqueryScan: { + SubqueryScan *subqueryscan = (SubqueryScan *)node; + get_all_stageno_from_plantree(subqueryscan->subplan, stageNo, stageNum, + isInitPlan); + } } if (node->initPlan) { ListCell *lc; @@ -438,478 +536,403 @@ void do_convert_plantree_to_common_plan(Plan *node, int32_t pid, bool isLeft, bool isSubPlan, List *splits, Relation rel, bool insist, CommonPlanContext *ctx) { - if (node == NULL || !ctx->convertible) - return; + if (node == NULL || !ctx->convertible) return; int32_t uid; switch (nodeTag(node)) { - case T_Motion: { - Motion *m = (Motion *)node; - ConnectorType connType; - if (m->motionType == MOTIONTYPE_HASH) { - connType = UnivPlanShuffle; - } else if (m->motionType == MOTIONTYPE_FIXED) { - if (m->numOutputSegs == 0) - connType = UnivPlanBroadcast; - else - connType = UnivPlanConverge; - } else { - goto end; - } - uid = univPlanConnectorNewInstance(ctx->univplan, pid); - univPlanConnectorSetType(ctx->univplan, connType); - univPlanConnectorSetStageNo(ctx->univplan, m->motionID); - if (m->numSortCols > 0) { - int32_t *mappingSortFuncId = palloc(m->numSortCols * sizeof(int32_t)); - int32_t *colIdx = palloc(m->numSortCols * sizeof(int32_t)); - for (int i = 0; i < m->numSortCols; i++) { - mappingSortFuncId[i] = - HAWQ_FUNCOID_MAPPING(get_opcode(m->sortOperators[i])); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingSortFuncId[i])) - goto end; - colIdx[i] = m->sortColIdx[i]; + case T_Motion: { + Motion *m = (Motion *)node; + ConnectorType connType; + if (m->motionType == MOTIONTYPE_HASH) { + connType = UnivPlanShuffle; + } else if (m->motionType == MOTIONTYPE_FIXED) { + if (m->numOutputSegs == 0) + connType = UnivPlanBroadcast; + else + connType = UnivPlanConverge; + } else { + goto end; } - univPlanConnectorSetColIdx(ctx->univplan, m->numSortCols, colIdx); - univPlanConnectorSetSortFuncId(ctx->univplan, m->numSortCols, - mappingSortFuncId); - pfree(mappingSortFuncId); - pfree(colIdx); - } - if (m->plan.directDispatch.isDirectDispatch) { - List *contentIds = m->plan.directDispatch.contentIds; - Assert(list_length(contentIds) == 1); - univPlanConnectorSetDirectDispatchId(ctx->univplan, - linitial_int(contentIds)); + uid = univPlanConnectorNewInstance(ctx->univplan, pid); + univPlanConnectorSetType(ctx->univplan, connType); + univPlanConnectorSetStageNo(ctx->univplan, m->motionID); + if (m->numSortCols > 0) { + int32_t *mappingSortFuncId = palloc(m->numSortCols * sizeof(int32_t)); + int32_t *colIdx = palloc(m->numSortCols * sizeof(int32_t)); + for (int i = 0; i < m->numSortCols; i++) { + mappingSortFuncId[i] = + HAWQ_FUNCOID_MAPPING(get_opcode(m->sortOperators[i])); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingSortFuncId[i])) goto end; + colIdx[i] = m->sortColIdx[i]; + } + univPlanConnectorSetColIdx(ctx->univplan, m->numSortCols, colIdx); + univPlanConnectorSetSortFuncId(ctx->univplan, m->numSortCols, + mappingSortFuncId); + pfree(mappingSortFuncId); + pfree(colIdx); + } + if (m->plan.directDispatch.isDirectDispatch) { + List *contentIds = m->plan.directDispatch.contentIds; + Assert(list_length(contentIds) == 1); + univPlanConnectorSetDirectDispatchId(ctx->univplan, + linitial_int(contentIds)); + } + if (connType == UnivPlanShuffle) { + if (!do_convert_hashExpr_to_common_plan(node, ctx)) goto end; + } + setDummyTListRef(ctx); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + unsetDummyTListRef(ctx); + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; } - if (connType == UnivPlanShuffle) { - if (!do_convert_hashExpr_to_common_plan(node, ctx)) + case T_MagmaIndexScan: + case T_MagmaIndexOnlyScan: + case T_ExternalScan: { + ExternalScan *n = (ExternalScan *)node; + // currently we support orc, text, csv and magma format + char fmtType = n->fmtType; + char *fmtName = NULL; + bool magmaTable = false; + fmtName = getExtTblFormatterTypeInFmtOptsList(n->fmtOpts); + // For orc and magma table have different infos in scan node + if (fmtName) { + if (!pg_strncasecmp(fmtName, MAGMATYPE, sizeof(MAGMATYPE) - 1)) { + magmaTable = true; + } + } + + if (fmtType != 'b' || + (pg_strncasecmp(fmtName, ORCTYPE, sizeof(ORCTYPE) - 1) && + pg_strncasecmp(fmtName, MAGMATYPE, sizeof(MAGMATYPE) - 1) && + pg_strncasecmp(fmtName, TEXTTYPE, sizeof(TEXTTYPE) - 1) && + pg_strncasecmp(fmtName, CSVTYPE, sizeof(CSVTYPE) - 1))) { + if (fmtName) pfree(fmtName); goto end; - } - setDummyTListRef(ctx); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - unsetDummyTListRef(ctx); - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_MagmaIndexScan: - case T_MagmaIndexOnlyScan: - case T_ExternalScan: { - ExternalScan *n = (ExternalScan *)node; - // currently we support orc and magma format - char fmtType = n->fmtType; - char *fmtName = NULL; - bool magmaTable = false; - fmtName = getExtTblFormatterTypeInFmtOptsList(n->fmtOpts); - // For orc and magma table have different infos in scan node - if (fmtName) { - if (!pg_strncasecmp(fmtName, MAGMATYPE, sizeof(MAGMATYPE) - 1)) { - magmaTable = true; } - } - if (fmtType != 'b' || - (pg_strncasecmp(fmtName, ORCTYPE, sizeof(ORCTYPE) - 1) && - pg_strncasecmp(fmtName, MAGMATYPE, sizeof(MAGMATYPE) - 1))) { - if (fmtName) - pfree(fmtName); - goto end; - } + if (fmtName) pfree(fmtName); - if (fmtName) - pfree(fmtName); + ListCell *lc; + foreach (lc, n->uriList) { + char *url = (char *)strVal(lfirst(lc)); + Uri *uri = ParseExternalTableUri(url); + if (uri == NULL || + (uri->protocol != URI_HDFS && uri->protocol != URI_MAGMA)) { + goto end; + } + } + // calculate columns to read for seqscan + int32_t numColsToRead = 0; + Plan *plan = (Plan *)&((Scan *)node)->plan; + Oid relOid; + // scan magma table in old executor + if (magmaTable && rel != NULL) { + relOid = rel->rd_id; + } else { + // scan magma table in new executor + relOid = getrelid(((Scan *)node)->scanrelid, ctx->stmt->rtable); + } + int32_t numCols = get_relnatts(relOid); + bool *proj = (bool *)palloc0(sizeof(bool) * numCols); + GetNeededColumnsForScan((Node *)plan->targetlist, proj, numCols); + GetNeededColumnsForScan((Node *)plan->qual, proj, numCols); + + // if (magmaTable) { + // int32_t i = 0; + // for (; i < numCols; ++i) { + // if (proj[i]) break; + // } + // if (i == numCols) proj[0] = true; + // } + + for (int32_t i = 0; i < numCols; i++) { + if (proj[i]) numColsToRead++; + } - ListCell *lc; - foreach (lc, n->uriList) { - char *url = (char *)strVal(lfirst(lc)); - Uri *uri = ParseExternalTableUri(url); - if (uri == NULL || - (uri->protocol != URI_HDFS && uri->protocol != URI_MAGMA)) { - goto end; + int32_t *columnsToRead = palloc(numColsToRead * sizeof(int32_t)); + int32_t index = 0; + for (int32_t i = 0; i < numCols; i++) { + if (proj[i]) columnsToRead[index++] = i + 1; } - } - // calculate columns to read for seqscan - int32_t numColsToRead = 0; - Plan *plan = (Plan *)&((Scan *)node)->plan; - Oid relOid; - // scan magma table in old executor - if (magmaTable && rel != NULL) { - relOid = rel->rd_id; - } else { - // scan magma table in new executor - relOid = getrelid(((Scan *)node)->scanrelid, ctx->stmt->rtable); - } - int32_t numCols = get_relnatts(relOid); - bool *proj = (bool *)palloc0(sizeof(bool) * numCols); - GetNeededColumnsForScan((Node *)plan->targetlist, proj, numCols); - GetNeededColumnsForScan((Node *)plan->qual, proj, numCols); - - // if (magmaTable) { - // int32_t i = 0; - // for (; i < numCols; ++i) { - // if (proj[i]) break; - // } - // if (i == numCols) proj[0] = true; - // } - - for (int32_t i = 0; i < numCols; i++) { - if (proj[i]) - numColsToRead++; - } - - int32_t *columnsToRead = palloc(numColsToRead * sizeof(int32_t)); - int32_t index = 0; - for (int32_t i = 0; i < numCols; i++) { - if (proj[i]) - columnsToRead[index++] = i + 1; - } - // This branch deal with magma table - if (magmaTable) { - uid = univPlanExtScanNewInstance(ctx->univplan, pid); - if (node->type != T_ExternalScan) { - univPlanExtScanSetIndex(ctx->univplan, true); - switch (node->type) { - case T_MagmaIndexScan: - univPlanExtScanSetScanType(ctx->univplan, ExternalIndexScan); - break; - case T_MagmaIndexOnlyScan: - univPlanExtScanSetScanType(ctx->univplan, ExternalIndexOnlyScan); - break; - default: - elog(ERROR, "unknown external scan type."); - break; + // This branch deal with magma table + if (magmaTable) { + uid = univPlanExtScanNewInstance(ctx->univplan, pid); + if (node->type != T_ExternalScan) { + univPlanExtScanSetIndex(ctx->univplan, true); + switch (node->type) { + case T_MagmaIndexScan: + univPlanExtScanSetScanType(ctx->univplan, ExternalIndexScan); + break; + case T_MagmaIndexOnlyScan: + univPlanExtScanSetScanType(ctx->univplan, ExternalIndexOnlyScan); + break; + default: + elog(ERROR, "unknown external scan type."); + break; + } + univPlanExtScanDirection(ctx->univplan, + ((ExternalScan *)node)->indexorderdir); + univPlanExtScanSetIndexName(ctx->univplan, + ((ExternalScan *)node)->indexname); + if (!do_convert_indexqual_to_common_plan(node, ctx, insist)) goto end; + } else { + univPlanExtScanSetScanType(ctx->univplan, NormalExternalScan); } - univPlanExtScanDirection(ctx->univplan, - ((ExternalScan *)node)->indexorderdir); - univPlanExtScanSetIndexName(ctx->univplan, - ((ExternalScan *)node)->indexname); - if (!do_convert_indexqual_to_common_plan(node, ctx, insist)) - goto end; + univPlanExtScanSetRelId(ctx->univplan, ((Scan *)node)->scanrelid); + univPlanExtScanSetReadStatsOnly(ctx->univplan, ctx->scanReadStatsOnly); + if (columnsToRead) + univPlanExtScanSetColumnsToRead(ctx->univplan, numColsToRead, + columnsToRead); + // TODO(xsheng) cannot convert some TARGETENTRY to univplan because + // some expression types are not supported by universal plan. + // e.g. (composite type field) + // update t_boxes set tp.len = (tp).len+1 where id = 2; + // currently we can support convert composite type(e.g. tp) but we + // cannot convert composite type field(e.g. tp.len) + // we won't use target list in the plan post-processing, comment it now + if (splits == NIL && ctx->stmt != NULL) { // do it for new executor + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + } + // if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, insist)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (splits != NULL && ctx->stmt == NULL) { + // old executor, only convert magma external scan plan + do_convert_splits_list_to_common_plan(splits, relOid, ctx); + } else if (splits == NIL && ctx->stmt != NULL) { + // new executor, convert whole plan + do_convert_splits_to_common_plan((Scan *)node, relOid, ctx); + } + } else if (!magmaTable) { // This branch deal with orc table + uid = univPlanSeqScanNewInstance(ctx->univplan, pid); + univPlanSeqScanSetRelId(ctx->univplan, ((Scan *)node)->scanrelid); + univPlanSeqScanSetReadStatsOnly(ctx->univplan, ctx->scanReadStatsOnly); + if (columnsToRead) + univPlanSeqScanSetColumnsToRead(ctx->univplan, numColsToRead, + columnsToRead); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + do_convert_splits_to_common_plan((Scan *)node, relOid, ctx); } else { - univPlanExtScanSetScanType(ctx->univplan, NormalExternalScan); + goto end; } - univPlanExtScanSetRelId(ctx->univplan, ((Scan *)node)->scanrelid); - univPlanExtScanSetReadStatsOnly(ctx->univplan, ctx->scanReadStatsOnly); - if (columnsToRead) - univPlanExtScanSetColumnsToRead(ctx->univplan, numColsToRead, - columnsToRead); - // TODO(xsheng) cannot convert some TARGETENTRY to univplan because - // some expression types are not supported by universal plan. - // e.g. (composite type field) - // update t_boxes set tp.len = (tp).len+1 where id = 2; - // currently we can support convert composite type(e.g. tp) but we - // cannot convert composite type field(e.g. tp.len) - // we won't use target list in the plan post-processing, comment it now - if (splits == NIL && ctx->stmt != NULL) { // do it for new executor - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; + break; + } + case T_AppendOnlyScan: { + int32_t numColsToRead = 0; + Plan *plan = (Plan *)&((Scan *)node)->plan; + Oid relOid = getrelid(((Scan *)node)->scanrelid, ctx->stmt->rtable); + int32_t numCols = get_relnatts(relOid); + bool *proj = (bool *)palloc0(sizeof(bool) * numCols); + GetNeededColumnsForScan((Node *)plan->targetlist, proj, numCols); + GetNeededColumnsForScan((Node *)plan->qual, proj, numCols); + + for (int32_t i = 0; i < numCols; i++) { + if (proj[i]) numColsToRead++; } - // if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, insist)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (splits != NULL && ctx->stmt == NULL) { - // old executor, only convert magma external scan plan - do_convert_splits_list_to_common_plan(splits, relOid, ctx); - } else if (splits == NIL && ctx->stmt != NULL) { - // new executor, convert whole plan - do_convert_splits_to_common_plan((Scan *)node, relOid, ctx); + + int32_t *columnsToRead = palloc(numColsToRead * sizeof(int32_t)); + int32_t index = 0; + for (int32_t i = 0; i < numCols; i++) { + if (proj[i]) columnsToRead[index++] = i + 1; } - } else if (!magmaTable) { // This branch deal with orc table uid = univPlanSeqScanNewInstance(ctx->univplan, pid); univPlanSeqScanSetRelId(ctx->univplan, ((Scan *)node)->scanrelid); univPlanSeqScanSetReadStatsOnly(ctx->univplan, ctx->scanReadStatsOnly); if (columnsToRead) univPlanSeqScanSetColumnsToRead(ctx->univplan, numColsToRead, columnsToRead); - if (!do_convert_targetlist_to_common_plan(node, ctx)) + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + do_convert_splits_to_common_plan((Scan *)node, relOid, ctx); + break; + } + case T_Agg: { + Agg *agg = (Agg *)node; + + uid = univPlanAggNewInstance(ctx->univplan, pid); + int64_t numCols = agg->numCols; + int32_t *grpColIdx = palloc(numCols * sizeof(int32_t)); + for (int i = 0; i < numCols; ++i) grpColIdx[i] = agg->grpColIdx[i]; + univPlanAggSetNumGroupsAndGroupColIndexes(ctx->univplan, agg->numGroups, + numCols, grpColIdx); + univPlanAggSetAggstrategy(ctx->univplan, agg->aggstrategy); + univPlanAggSetRollup(ctx->univplan, agg->numNullCols, agg->inputGrouping, + agg->grouping, agg->rollupGSTimes, + agg->inputHasGrouping, agg->lastAgg, agg->streaming); + pfree(grpColIdx); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (!isSubPlan) checkReadStatsOnlyForAgg(agg, ctx); + break; + } + case T_Sort: { + Sort *sort = (Sort *)node; + uid = univPlanSortNewInstance(ctx->univplan, pid); + int32_t *mappingSortFuncId = palloc(sort->numCols * sizeof(int32_t)); + int32_t *colIdx = palloc(sort->numCols * sizeof(int32_t)); + for (int i = 0; i < sort->numCols; i++) { + mappingSortFuncId[i] = + HAWQ_FUNCOID_MAPPING(get_opcode(sort->sortOperators[i])); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingSortFuncId[i])) goto end; + colIdx[i] = sort->sortColIdx[i]; + } + univPlanSortSetColIdx(ctx->univplan, sort->numCols, colIdx); + univPlanSortSetSortFuncId(ctx->univplan, sort->numCols, + mappingSortFuncId); + univPlanSortSetNoDuplicates(ctx->univplan, sort->noduplicates); + pfree(mappingSortFuncId); + pfree(colIdx); + if (!do_convert_sort_limit_to_common_plan(sort, ctx)) goto end; + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_Limit: { + Limit *limit = (Limit *)node; + uid = univPlanLimitNewInstance(ctx->univplan, pid); + if (!do_convert_limit_to_common_plan(limit, ctx)) goto end; + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_Append: { + Append *append = (Append *)node; + if (append->isTarget || append->plan.qual) goto end; + uid = univPlanAppendNewInstance(ctx->univplan, pid); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_NestLoop: { + if (pg_strcasecmp(enable_alpha_newqe_str, "OFF") == 0) goto end; + NestLoop *nl = (NestLoop *)node; + if (nl->outernotreferencedbyinner || nl->shared_outer || + nl->singleton_outer) goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) + uid = univPlanNestLoopNewInstance(ctx->univplan, pid); + if (!univPlanNestLoopSetType(ctx->univplan, + (UnivPlanCJoinType)nl->join.jointype)) goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (!do_convert_nestloop_joinqual_to_common_plan(nl, ctx)) goto end; + break; + } + case T_HashJoin: { + if (pg_strcasecmp(enable_alpha_newqe_str, "OFF") == 0) goto end; + HashJoin *hj = (HashJoin *)node; + uid = univPlanHashJoinNewInstance(ctx->univplan, pid); + if (!univPlanHashJoinSetType(ctx->univplan, + (UnivPlanCJoinType)hj->join.jointype)) goto end; - do_convert_splits_to_common_plan((Scan *)node, relOid, ctx); - } else { - goto end; + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (!do_convert_hashjoin_clause_to_common_plan(hj, ctx)) goto end; + break; } - break; - } - case T_AppendOnlyScan: { - int32_t numColsToRead = 0; - Plan *plan = (Plan *)&((Scan *)node)->plan; - Oid relOid = getrelid(((Scan *)node)->scanrelid, ctx->stmt->rtable); - int32_t numCols = get_relnatts(relOid); - bool *proj = (bool *)palloc0(sizeof(bool) * numCols); - GetNeededColumnsForScan((Node *)plan->targetlist, proj, numCols); - GetNeededColumnsForScan((Node *)plan->qual, proj, numCols); - - for (int32_t i = 0; i < numCols; i++) { - if (proj[i]) - numColsToRead++; - } - - int32_t *columnsToRead = palloc(numColsToRead * sizeof(int32_t)); - int32_t index = 0; - for (int32_t i = 0; i < numCols; i++) { - if (proj[i]) - columnsToRead[index++] = i + 1; - } - uid = univPlanSeqScanNewInstance(ctx->univplan, pid); - univPlanSeqScanSetRelId(ctx->univplan, ((Scan *)node)->scanrelid); - univPlanSeqScanSetReadStatsOnly(ctx->univplan, ctx->scanReadStatsOnly); - if (columnsToRead) - univPlanSeqScanSetColumnsToRead(ctx->univplan, numColsToRead, - columnsToRead); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - do_convert_splits_to_common_plan((Scan *)node, relOid, ctx); - break; - } - case T_Agg: { - Agg *agg = (Agg *)node; - - uid = univPlanAggNewInstance(ctx->univplan, pid); - int64_t numCols = agg->numCols; - int32_t *grpColIdx = palloc(numCols * sizeof(int32_t)); - for (int i = 0; i < numCols; ++i) - grpColIdx[i] = agg->grpColIdx[i]; - univPlanAggSetNumGroupsAndGroupColIndexes(ctx->univplan, agg->numGroups, - numCols, grpColIdx); - univPlanAggSetAggstrategy(ctx->univplan, agg->aggstrategy); - univPlanAggSetRollup(ctx->univplan, agg->numNullCols, agg->inputGrouping, - agg->grouping, agg->rollupGSTimes, - agg->inputHasGrouping, agg->lastAgg, agg->streaming); - pfree(grpColIdx); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (!isSubPlan) - checkReadStatsOnlyForAgg(agg, ctx); - break; - } - case T_Sort: { - Sort *sort = (Sort *)node; - uid = univPlanSortNewInstance(ctx->univplan, pid); - int32_t *mappingSortFuncId = palloc(sort->numCols * sizeof(int32_t)); - int32_t *colIdx = palloc(sort->numCols * sizeof(int32_t)); - for (int i = 0; i < sort->numCols; i++) { - mappingSortFuncId[i] = - HAWQ_FUNCOID_MAPPING(get_opcode(sort->sortOperators[i])); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingSortFuncId[i])) + case T_Hash: { + uid = univPlanHashNewInstance(ctx->univplan, pid); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_MergeJoin: { + MergeJoin *mj = (MergeJoin *)node; + uid = univPlanMergeJoinNewInstance(ctx->univplan, pid); + univPlanMergeJoinSetUniqueOuter(ctx->univplan, mj->unique_outer); + if (!univPlanMergeJoinSetType(ctx->univplan, + (UnivPlanCJoinType)mj->join.jointype)) goto end; - colIdx[i] = sort->sortColIdx[i]; - } - univPlanSortSetColIdx(ctx->univplan, sort->numCols, colIdx); - univPlanSortSetSortFuncId(ctx->univplan, sort->numCols, mappingSortFuncId); - univPlanSortSetNoDuplicates(ctx->univplan, sort->noduplicates); - pfree(mappingSortFuncId); - pfree(colIdx); - if (!do_convert_sort_limit_to_common_plan(sort, ctx)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_Limit: { - Limit *limit = (Limit *)node; - uid = univPlanLimitNewInstance(ctx->univplan, pid); - if (!do_convert_limit_to_common_plan(limit, ctx)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_Append: { - Append *append = (Append *)node; - if (append->isTarget || append->plan.qual) - goto end; - uid = univPlanAppendNewInstance(ctx->univplan, pid); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_NestLoop: { - if (pg_strcasecmp(enable_alpha_newqe_str, "OFF") == 0) - goto end; - NestLoop *nl = (NestLoop *)node; - if (nl->outernotreferencedbyinner || nl->shared_outer || - nl->singleton_outer) - goto end; - uid = univPlanNestLoopNewInstance(ctx->univplan, pid); - if (!univPlanNestLoopSetType(ctx->univplan, - (UnivPlanCJoinType)nl->join.jointype)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (!do_convert_nestloop_joinqual_to_common_plan(nl, ctx)) - goto end; - break; - } - case T_HashJoin: { - if (pg_strcasecmp(enable_alpha_newqe_str, "OFF") == 0) - goto end; - HashJoin *hj = (HashJoin *)node; - uid = univPlanHashJoinNewInstance(ctx->univplan, pid); - if (!univPlanHashJoinSetType(ctx->univplan, - (UnivPlanCJoinType)hj->join.jointype)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (!do_convert_hashjoin_clause_to_common_plan(hj, ctx)) - goto end; - break; - } - case T_Hash: { - uid = univPlanHashNewInstance(ctx->univplan, pid); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_MergeJoin: { - MergeJoin *mj = (MergeJoin *)node; - uid = univPlanMergeJoinNewInstance(ctx->univplan, pid); - univPlanMergeJoinSetUniqueOuter(ctx->univplan, mj->unique_outer); - if (!univPlanMergeJoinSetType(ctx->univplan, - (UnivPlanCJoinType)mj->join.jointype)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (!do_convert_mergejoin_clause_to_common_plan(mj, ctx)) - goto end; - break; - } - case T_Material: { - Material *material = (Material *)node; - uid = univPlanMaterialNewInstance(ctx->univplan, pid); - if (!univPlanMaterialSetAttr( - ctx->univplan, (UnivPlanCShareType)material->share_type, - material->cdb_strict, material->share_id, material->driver_slice, - material->nsharer, material->nsharer_xslice)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_ShareInputScan: { - ShareInputScan *shareInputScan = (ShareInputScan *)node; - uid = univPlanShareInputScanNewInstance(ctx->univplan, pid); - if (!univPlanShareInputScanSetAttr( - ctx->univplan, (UnivPlanCShareType)shareInputScan->share_type, - shareInputScan->share_id, shareInputScan->driver_slice)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_Result: { - Result *result = (Result *)node; - if (result->hashFilter) - goto end; - uid = univPlanResultNewInstance(ctx->univplan, pid); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (!do_convert_result_qual_to_common_plan(result, ctx)) - goto end; - break; - } - case T_SubqueryScan: { - SubqueryScan *subqueryscan = (SubqueryScan *)node; - uid = univPlanSubqueryScanNewInstance(ctx->univplan, pid); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - if (!do_convert_subqueryscan_subplan_to_common_plan(subqueryscan, ctx)) - goto end; - break; - } - case T_Unique: { - Unique *uniq = (Unique *)node; - uid = univPlanUniqueNewInstance(ctx->univplan, pid); - int64_t numCols = uniq->numCols; - int32_t *uniqColIdx = palloc(numCols * sizeof(int32_t)); - for (int i = 0; i < numCols; ++i) - uniqColIdx[i] = uniq->uniqColIdx[i]; - univPlanUniqueSetNumGroupsAndUniqColIdxs(ctx->univplan, uniq->numCols, - uniqColIdx); - pfree(uniqColIdx); - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) - goto end; - break; - } - case T_SetOp: { - SetOp *setop = (SetOp *)node; - uid = univPlanSetOpNewInstance(ctx->univplan, pid); - if (!univPlanSetOpSetAttr(ctx->univplan, setop->cmd, setop->numCols, - setop->dupColIdx, setop->flagColIdx)) - goto end; - if (!do_convert_targetlist_to_common_plan(node, ctx)) - goto end; - if (!do_convert_quallist_to_common_plan(node, ctx, true)) - goto end; - if (!do_convert_initplan_to_common_plan(node, ctx)) + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (!do_convert_mergejoin_clause_to_common_plan(mj, ctx)) goto end; + break; + } + case T_Material: { + Material *material = (Material *)node; + uid = univPlanMaterialNewInstance(ctx->univplan, pid); + if (!univPlanMaterialSetAttr( + ctx->univplan, (UnivPlanCShareType)material->share_type, + material->cdb_strict, material->share_id, material->driver_slice, + material->nsharer, material->nsharer_xslice)) + goto end; + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_ShareInputScan: { + ShareInputScan *shareInputScan = (ShareInputScan *)node; + uid = univPlanShareInputScanNewInstance(ctx->univplan, pid); + if (!univPlanShareInputScanSetAttr( + ctx->univplan, (UnivPlanCShareType)shareInputScan->share_type, + shareInputScan->share_id, shareInputScan->driver_slice)) + goto end; + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_Result: { + Result *result = (Result *)node; + if (result->hashFilter) goto end; + uid = univPlanResultNewInstance(ctx->univplan, pid); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (!do_convert_result_qual_to_common_plan(result, ctx)) goto end; + break; + } + case T_SubqueryScan: { + SubqueryScan *subqueryscan = (SubqueryScan *)node; + uid = univPlanSubqueryScanNewInstance(ctx->univplan, pid); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + if (!do_convert_subqueryscan_subplan_to_common_plan(subqueryscan, ctx)) + goto end; + break; + } + case T_Unique: { + Unique *uniq = (Unique *)node; + uid = univPlanUniqueNewInstance(ctx->univplan, pid); + int64_t numCols = uniq->numCols; + int32_t *uniqColIdx = palloc(numCols * sizeof(int32_t)); + for (int i = 0; i < numCols; ++i) uniqColIdx[i] = uniq->uniqColIdx[i]; + univPlanUniqueSetNumGroupsAndUniqColIdxs(ctx->univplan, uniq->numCols, + uniqColIdx); + pfree(uniqColIdx); + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + case T_SetOp: { + SetOp *setop = (SetOp *)node; + uid = univPlanSetOpNewInstance(ctx->univplan, pid); + if (!univPlanSetOpSetAttr(ctx->univplan, setop->cmd, setop->numCols, + setop->dupColIdx, setop->flagColIdx)) + goto end; + if (!do_convert_targetlist_to_common_plan(node, ctx)) goto end; + if (!do_convert_quallist_to_common_plan(node, ctx, true)) goto end; + if (!do_convert_initplan_to_common_plan(node, ctx)) goto end; + break; + } + default: // plannode not supported yet goto end; - break; - } - default: // plannode not supported yet - goto end; } univPlanSetPlanNodeInfo(ctx->univplan, node->plan_rows, node->plan_width, @@ -942,8 +965,7 @@ bool do_convert_targetlist_to_common_plan(Plan *node, CommonPlanContext *ctx) { foreach (lc, node->targetlist) { TargetEntry *te = (TargetEntry *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, (Expr *)te, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, (Expr *)te, ctx)) return false; univPlanTargetListAddTargetEntry(ctx->univplan, te->resjunk); } return true; @@ -996,8 +1018,7 @@ bool do_convert_initplan_to_common_plan(Plan *node, CommonPlanContext *ctx) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); bool convert_ret = do_convert_expr_to_common_plan(-1, expr, ctx); - if (!convert_ret) - return false; + if (!convert_ret) return false; univPlanInitplanAddExpr(ctx->univplan); } return true; @@ -1008,8 +1029,7 @@ bool do_convert_hashExpr_to_common_plan(Motion *node, CommonPlanContext *ctx) { foreach (lc, node->hashExpr) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanConnectorAddHashExpr(ctx->univplan); } return true; @@ -1060,8 +1080,7 @@ bool do_convert_nestloop_joinqual_to_common_plan(NestLoop *node, foreach (lc, node->join.joinqual) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanNestLoopAddJoinQual(ctx->univplan); } return true; @@ -1074,24 +1093,21 @@ bool do_convert_hashjoin_clause_to_common_plan(HashJoin *node, foreach (lc, node->join.joinqual) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanHashJoinAddJoinQual(ctx->univplan); } foreach (lc, node->hashclauses) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanHashJoinAddHashClause(ctx->univplan); } foreach (lc, node->hashqualclauses) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanHashJoinAddHashQualClause(ctx->univplan); } return true; @@ -1103,15 +1119,13 @@ bool do_convert_mergejoin_clause_to_common_plan(MergeJoin *node, foreach (lc, node->join.joinqual) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanMergeJoinAddJoinQual(ctx->univplan); } foreach (lc, node->mergeclauses) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanMergeJoinAddMergeClause(ctx->univplan); } return true; @@ -1123,8 +1137,7 @@ bool do_convert_result_qual_to_common_plan(Result *node, foreach (lc, (List *)node->resconstantqual) { Expr *expr = (Expr *)lfirst(lc); univPlanNewExpr(ctx->univplan); - if (!do_convert_expr_to_common_plan(-1, expr, ctx)) - return false; + if (!do_convert_expr_to_common_plan(-1, expr, ctx)) return false; univPlanResultAddResConstantQual(ctx->univplan); } return true; @@ -1197,8 +1210,7 @@ void do_convert_splits_list_to_common_plan(List *splits, Oid relOid, logicEof, NULL, NULL); for (index = 0; index < fileSplitNum; ++index) - if (fileName[index]) - pfree(fileName[index]); + if (fileName[index]) pfree(fileName[index]); pfree(fileName); pfree(start); pfree(len); @@ -1368,10 +1380,9 @@ void do_convert_onetbl_to_common_plan(Oid relid, CommonPlanContext *ctx) { char *targetName = NULL; getFmtName(fmtOptsJson, &fmtName); int16_t magmaType = -1; - if (pg_strncasecmp(fmtName, "\"magmatp\"", strlen("\"magmatp\"")) == 0) { + if (pg_strcasecmp(fmtName, "magmatp") == 0) { magmaType = 0; - } else if (pg_strncasecmp(fmtName, "\"magmaap\"", strlen("\"magmaap\"")) == - 0) { + } else if (pg_strcasecmp(fmtName, "magmaap") == 0) { magmaType = 1; } // indicate magma format table @@ -1416,18 +1427,41 @@ void do_convert_onetbl_to_common_plan(Oid relid, CommonPlanContext *ctx) { } if (opt_json_object != NULL) { const char *str = json_object_to_json_string(opt_json_object); + pfree(fmtOptsJson); fmtOptsJson = (char *)palloc0(strlen(str) + 1); strcpy(fmtOptsJson, str); json_object_put(opt_json_object); } } - } else if (pg_strncasecmp(fmtName, "\"orc\"", strlen("\"orc\"")) == 0) { + } else if (pg_strcasecmp(fmtName, ORCTYPE) == 0) { fmttype = UnivPlanOrcFormat; location = pstrdup(strVal(linitial(extEntry->locations))); - } else if (pg_strncasecmp(fmtName, "\"csv\"", strlen("\"csv\"")) == 0 || - pg_strncasecmp(fmtName, "\"text\"", strlen("\"text\"")) == 0) { - univPlanRangeTblEntryAddDummy(ctx->univplan); - goto end; + } else if (pg_strcasecmp(fmtName, TEXTTYPE) == 0 || + pg_strcasecmp(fmtName, CSVTYPE) == 0) { + for (int i = 0; i < attNum; ++i) { + // newQE doesn't support date/timestamp in text/csv format yet + if (columnDataType[i] == DATEID || columnDataType[i] == TIMESTAMPID || + columnDataType[i] == TIMESTAMPTZID) { + ctx->convertible = false; + goto end; + } + } + struct json_object *opt_json_object = json_tokener_parse(fmtOptsJson); + if (pg_strcasecmp(fmtName, TEXTTYPE) == 0) { + buildDefaultFormatterOptionsInJson(extEntry->encoding, + TextFormatTypeTXT, opt_json_object); + fmttype = UnivPlanTextFormat; + } else { + buildDefaultFormatterOptionsInJson(extEntry->encoding, + TextFormatTypeCSV, opt_json_object); + fmttype = UnivPlanCsvFormat; + } + const char *optsJsonStr = json_object_to_json_string(opt_json_object); + pfree(fmtOptsJson); + fmtOptsJson = (char *)palloc0(strlen(optsJsonStr) + 1); + strcpy(fmtOptsJson, optsJsonStr); + json_object_put(opt_json_object); + location = pstrdup(strVal(linitial(extEntry->locations))); } else { elog(ERROR, "Cannot get external table format."); } @@ -1437,12 +1471,9 @@ void do_convert_onetbl_to_common_plan(Oid relid, CommonPlanContext *ctx) { (const char **)columnName, columnDataType, columnDataTypeMod, targetName, NULL); - if (fmtOptsJson != NULL) - pfree(fmtOptsJson); - if (fmtName != NULL) - pfree(fmtName); - if (targetName != NULL) - pfree(targetName); + if (fmtOptsJson != NULL) pfree(fmtOptsJson); + if (fmtName != NULL) pfree(fmtName); + if (targetName != NULL) pfree(targetName); pfree(location); } else { univPlanRangeTblEntryAddDummy(ctx->univplan); @@ -1492,6 +1523,16 @@ static void do_convert_result_partitions_to_common_plan( ctx->convertible = false; return; } + + Relation rel = RelationIdGetRelation(partitionNode->part->parrelid); + if (rel->rd_node.relNode != partitionNode->part->parrelid) { + // TODO(chiyang): support INSERT INTO partition table after TRUNCATE + ctx->convertible = false; + RelationClose(rel); + return; + } + RelationClose(rel); + univPlanAddResultPartitions(ctx->univplan, partitionNode->part->parrelid, partitionNode->part->parkind, partitionNode->part->paratts, @@ -1564,432 +1605,410 @@ bool do_convert_expr_to_common_plan(int32_t pid, Expr *expr, ListCell *lc; Expr *old; switch (expr->type) { - case T_TargetEntry: { - TargetEntry *te = (TargetEntry *)expr; - old = parentExprSwitchTo(expr, ctx); - if (!do_convert_expr_to_common_plan(pid, te->expr, ctx)) - goto end; - parentExprSwitchTo(old, ctx); - break; - } + case T_TargetEntry: { + TargetEntry *te = (TargetEntry *)expr; + old = parentExprSwitchTo(expr, ctx); + if (!do_convert_expr_to_common_plan(pid, te->expr, ctx)) goto end; + parentExprSwitchTo(old, ctx); + break; + } - case T_RelabelType: { - RelabelType *te = (RelabelType *)expr; - old = parentExprSwitchTo(expr, ctx); - if (!do_convert_expr_to_common_plan(pid, te->arg, ctx)) - goto end; - parentExprSwitchTo(old, ctx); - break; - } + case T_RelabelType: { + RelabelType *te = (RelabelType *)expr; + old = parentExprSwitchTo(expr, ctx); + if (!do_convert_expr_to_common_plan(pid, te->arg, ctx)) goto end; + parentExprSwitchTo(old, ctx); + break; + } - case T_Var: { - Var *var = (Var *)expr; - // TODO(chiyang): support system attribute - if (var->varattno < 0 && - !(var->varattno == SelfItemPointerAttributeNumber || - var->varattno == GpSegmentIdAttributeNumber)) - goto end; - if (ctx->parent && ctx->parent->type == T_Aggref) { - Aggref *aggref = (Aggref *)ctx->parent; - univPlanAggrefAddProxyVar(ctx->univplan, pid, var->varattno, - HAWQ_FUNCOID_MAPPING(aggref->aggfnoid), - var->vartypmod, var->varnoold, var->varoattno); - } else { - Oid varType = var->vartype; - if (checkUnsupportedDataType(varType, DateStyle)) + case T_Var: { + Var *var = (Var *)expr; + // TODO(chiyang): support system attribute + if (var->varattno < 0 && + !(var->varattno == SelfItemPointerAttributeNumber || + var->varattno == GpSegmentIdAttributeNumber)) goto end; - univPlanExprAddVar(ctx->univplan, pid, - var->varno == DIRECT_LEFT_CHILD_VAR ? OUTER - : var->varno, - var->varattno, map_hawq_type_to_common_plan(varType), - var->vartypmod, var->varnoold, var->varoattno); + if (ctx->parent && ctx->parent->type == T_Aggref) { + Aggref *aggref = (Aggref *)ctx->parent; + univPlanAggrefAddProxyVar(ctx->univplan, pid, var->varattno, + HAWQ_FUNCOID_MAPPING(aggref->aggfnoid), + var->vartypmod, var->varnoold, + var->varoattno); + } else { + Oid varType = var->vartype; + if (checkUnsupportedDataType(varType, DateStyle)) goto end; + univPlanExprAddVar( + ctx->univplan, pid, + var->varno == DIRECT_LEFT_CHILD_VAR ? OUTER : var->varno, + var->varattno, map_hawq_type_to_common_plan(varType), + var->vartypmod, var->varnoold, var->varoattno); + } + break; } - break; - } - case T_Const: { - Const *constval = (Const *)expr; - int32_t consttype = map_hawq_type_to_common_plan(constval->consttype); - if ((!constval->constisnull) && - (checkUnsupportedDataType(constval->consttype, DateStyle))) - goto end; - if (ctx->setDummyTListRef && ctx->parent && - ctx->parent->type == T_TargetEntry) { - univPlanExprAddVar(ctx->univplan, pid, OUTER, - ((TargetEntry *)ctx->parent)->resno, consttype, - constval->consttypmod, 0, 0); - } else { - Oid typoutput; - bool typIsVarlena; - getTypeOutputInfo(constval->consttype, &typoutput, &typIsVarlena); - char *extval = NULL; - if (!constval->constisnull) { - int savedDateStyle = DateStyle; - int savedDateOrder = DateOrder; - DateStyle = USE_ISO_DATES; - DateOrder = DATEORDER_MDY; - extval = OidOutputFunctionCall(typoutput, constval->constvalue); - DateStyle = savedDateStyle; - DateOrder = savedDateOrder; - if (constval->consttype == INTERVALOID) { - Interval *ival = (Interval *)DatumGetPointer(constval->constvalue); - extval = palloc(sizeof(char) * INT64_MAX_LENGTH * 2); - sprintf(extval, "%d:%d:%lld", ival->month, ival->day, ival->time); + case T_Const: { + Const *constval = (Const *)expr; + int32_t consttype = map_hawq_type_to_common_plan(constval->consttype); + if ((!constval->constisnull) && + (checkUnsupportedDataType(constval->consttype, DateStyle))) + goto end; + if (ctx->setDummyTListRef && ctx->parent && + ctx->parent->type == T_TargetEntry) { + univPlanExprAddVar(ctx->univplan, pid, OUTER, + ((TargetEntry *)ctx->parent)->resno, consttype, + constval->consttypmod, 0, 0); + } else { + Oid typoutput; + bool typIsVarlena; + getTypeOutputInfo(constval->consttype, &typoutput, &typIsVarlena); + char *extval = NULL; + if (!constval->constisnull) { + int savedDateStyle = DateStyle; + int savedDateOrder = DateOrder; + DateStyle = USE_ISO_DATES; + DateOrder = DATEORDER_MDY; + extval = OidOutputFunctionCall(typoutput, constval->constvalue); + DateStyle = savedDateStyle; + DateOrder = savedDateOrder; + if (constval->consttype == INTERVALOID) { + Interval *ival = (Interval *)DatumGetPointer(constval->constvalue); + extval = palloc(sizeof(char) * INT64_MAX_LENGTH * 2); + sprintf(extval, "%d:%d:%lld", ival->month, ival->day, ival->time); + } } + univPlanExprAddConst(ctx->univplan, pid, consttype, + constval->constisnull, extval, + constval->consttypmod); } - univPlanExprAddConst(ctx->univplan, pid, consttype, constval->constisnull, - extval, constval->consttypmod); + break; } - break; - } - case T_OpExpr: { - OpExpr *opExpr = (OpExpr *)expr; + case T_OpExpr: { + OpExpr *opExpr = (OpExpr *)expr; - old = parentExprSwitchTo(expr, ctx); + old = parentExprSwitchTo(expr, ctx); - mappingFuncId = HAWQ_FUNCOID_MAPPING(opExpr->opfuncid); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) - goto end; - uid = univPlanExprAddOpExpr(ctx->univplan, pid, mappingFuncId); - foreach (lc, opExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) - goto end; - } + mappingFuncId = HAWQ_FUNCOID_MAPPING(opExpr->opfuncid); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) goto end; + uid = univPlanExprAddOpExpr(ctx->univplan, pid, mappingFuncId); + foreach (lc, opExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - parentExprSwitchTo(old, ctx); - break; - } + parentExprSwitchTo(old, ctx); + break; + } - case T_FuncExpr: { - FuncExpr *funcExpr = (FuncExpr *)expr; + case T_FuncExpr: { + FuncExpr *funcExpr = (FuncExpr *)expr; - old = parentExprSwitchTo(expr, ctx); + old = parentExprSwitchTo(expr, ctx); - mappingFuncId = HAWQ_FUNCOID_MAPPING(funcExpr->funcid); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) - goto end; - if (IS_HAWQ_MAPPING_DO_NOTHING(mappingFuncId)) { - if (funcExpr->args->length != 1) - goto end; - foreach (lc, funcExpr->args) { - if (!do_convert_expr_to_common_plan(pid, lfirst(lc), ctx)) - goto end; - } - } else { - uid = univPlanExprAddFuncExpr(ctx->univplan, pid, mappingFuncId); - foreach (lc, funcExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) - goto end; + mappingFuncId = HAWQ_FUNCOID_MAPPING(funcExpr->funcid); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) goto end; + if (IS_HAWQ_MAPPING_DO_NOTHING(mappingFuncId)) { + if (funcExpr->args->length != 1) goto end; + foreach (lc, funcExpr->args) { + if (!do_convert_expr_to_common_plan(pid, lfirst(lc), ctx)) goto end; + } + } else { + uid = univPlanExprAddFuncExpr(ctx->univplan, pid, mappingFuncId); + foreach (lc, funcExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } } + + parentExprSwitchTo(old, ctx); + break; } - parentExprSwitchTo(old, ctx); - break; - } + case T_Aggref: { + Aggref *aggref = (Aggref *)expr; - case T_Aggref: { - Aggref *aggref = (Aggref *)expr; + // disable count distinct case + if (aggref->aggdistinct || aggref->aggorder) goto end; - // disable count distinct case - if (aggref->aggdistinct || aggref->aggorder) - goto end; + old = parentExprSwitchTo(expr, ctx); - old = parentExprSwitchTo(expr, ctx); + mappingFuncId = HAWQ_FUNCOID_MAPPING(aggref->aggfnoid); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) goto end; + switch (aggref->aggstage) { + case AGGSTAGE_NORMAL: + uid = univPlanAggrefAddOneStage(ctx->univplan, pid, mappingFuncId); + break; + case AGGSTAGE_PARTIAL: + uid = + univPlanAggrefAddPartialStage(ctx->univplan, pid, mappingFuncId); + break; + case AGGSTAGE_INTERMEDIATE: + uid = univPlanAggrefAddIntermediateStage(ctx->univplan, pid, + mappingFuncId); + break; + case AGGSTAGE_FINAL: + uid = univPlanAggrefAddFinalStage(ctx->univplan, pid, mappingFuncId); + break; + default: + goto end; + } - mappingFuncId = HAWQ_FUNCOID_MAPPING(aggref->aggfnoid); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) - goto end; - switch (aggref->aggstage) { - case AGGSTAGE_NORMAL: - uid = univPlanAggrefAddOneStage(ctx->univplan, pid, mappingFuncId); - break; - case AGGSTAGE_PARTIAL: - uid = univPlanAggrefAddPartialStage(ctx->univplan, pid, mappingFuncId); - break; - case AGGSTAGE_INTERMEDIATE: - uid = - univPlanAggrefAddIntermediateStage(ctx->univplan, pid, mappingFuncId); - break; - case AGGSTAGE_FINAL: - uid = univPlanAggrefAddFinalStage(ctx->univplan, pid, mappingFuncId); - break; - default: - goto end; - } + foreach (lc, aggref->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - foreach (lc, aggref->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) - goto end; + parentExprSwitchTo(old, ctx); + break; } + case T_BoolExpr: { + BoolExpr *boolExpr = (BoolExpr *)expr; - parentExprSwitchTo(old, ctx); - break; - } - case T_BoolExpr: { - BoolExpr *boolExpr = (BoolExpr *)expr; + old = parentExprSwitchTo(expr, ctx); - old = parentExprSwitchTo(expr, ctx); + uid = univPlanExprAddBoolExpr(ctx->univplan, pid, + (UnivplanBoolExprType)boolExpr->boolop); + foreach (lc, boolExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - uid = univPlanExprAddBoolExpr(ctx->univplan, pid, - (UnivplanBoolExprType)boolExpr->boolop); - foreach (lc, boolExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) - goto end; + parentExprSwitchTo(old, ctx); + break; } + case T_NullTest: { + NullTest *nullTest = (NullTest *)expr; - parentExprSwitchTo(old, ctx); - break; - } - case T_NullTest: { - NullTest *nullTest = (NullTest *)expr; - - old = parentExprSwitchTo(expr, ctx); + old = parentExprSwitchTo(expr, ctx); - uid = univPlanExprAddNullTestExpr( - ctx->univplan, pid, (UnivplanNullTestType)nullTest->nulltesttype); - if (!do_convert_expr_to_common_plan(uid, nullTest->arg, ctx)) - goto end; + uid = univPlanExprAddNullTestExpr( + ctx->univplan, pid, (UnivplanNullTestType)nullTest->nulltesttype); + if (!do_convert_expr_to_common_plan(uid, nullTest->arg, ctx)) goto end; - parentExprSwitchTo(old, ctx); - break; - } - case T_BooleanTest: { - BooleanTest *boolTest = (BooleanTest *)expr; + parentExprSwitchTo(old, ctx); + break; + } + case T_BooleanTest: { + BooleanTest *boolTest = (BooleanTest *)expr; - old = parentExprSwitchTo(expr, ctx); + old = parentExprSwitchTo(expr, ctx); - uid = univPlanExprAddBoolTestExpr( - ctx->univplan, pid, (UnivplanBooleanTestType)boolTest->booltesttype); - if (!do_convert_expr_to_common_plan(uid, boolTest->arg, ctx)) - goto end; + uid = univPlanExprAddBoolTestExpr( + ctx->univplan, pid, (UnivplanBooleanTestType)boolTest->booltesttype); + if (!do_convert_expr_to_common_plan(uid, boolTest->arg, ctx)) goto end; - parentExprSwitchTo(old, ctx); - break; - } + parentExprSwitchTo(old, ctx); + break; + } - case T_CaseExpr: { - CaseExpr *caseexpr = (CaseExpr *)expr; + case T_CaseExpr: { + CaseExpr *caseexpr = (CaseExpr *)expr; - old = parentExprSwitchTo(expr, ctx); + old = parentExprSwitchTo(expr, ctx); - ctx->exprBufStack = lcons(caseexpr->arg, ctx->exprBufStack); + ctx->exprBufStack = lcons(caseexpr->arg, ctx->exprBufStack); - int32_t casetype = map_hawq_type_to_common_plan(caseexpr->casetype); - if (checkUnsupportedDataType(caseexpr->casetype, DateStyle)) { - goto end; - } - uid = univPlanExprAddCaseExpr(ctx->univplan, pid, casetype); - foreach (lc, caseexpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) + int32_t casetype = map_hawq_type_to_common_plan(caseexpr->casetype); + if (checkUnsupportedDataType(caseexpr->casetype, DateStyle)) { goto end; - } - - univPlanExprAddCaseExprDefresult(ctx->univplan, uid); - if (!do_convert_expr_to_common_plan(uid, caseexpr->defresult, ctx)) - goto end; - - parentExprSwitchTo(old, ctx); - ctx->exprBufStack = list_delete_first(ctx->exprBufStack); - break; - } - - case T_CaseWhen: { - CaseWhen *casewhen = (CaseWhen *)expr; + } + uid = univPlanExprAddCaseExpr(ctx->univplan, pid, casetype); + foreach (lc, caseexpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - old = parentExprSwitchTo(expr, ctx); + univPlanExprAddCaseExprDefresult(ctx->univplan, uid); + if (!do_convert_expr_to_common_plan(uid, caseexpr->defresult, ctx)) + goto end; - uid = univPlanExprAddCaseWhen(ctx->univplan, pid); + parentExprSwitchTo(old, ctx); + ctx->exprBufStack = list_delete_first(ctx->exprBufStack); + break; + } - univPlanExprAddCaseWhenExpr(ctx->univplan, uid); - if (!do_convert_expr_to_common_plan(uid, casewhen->expr, ctx)) - goto end; + case T_CaseWhen: { + CaseWhen *casewhen = (CaseWhen *)expr; - univPlanExprAddCaseWhenResult(ctx->univplan, uid); - if (!do_convert_expr_to_common_plan(uid, casewhen->result, ctx)) - goto end; + old = parentExprSwitchTo(expr, ctx); - parentExprSwitchTo(old, ctx); - break; - } + uid = univPlanExprAddCaseWhen(ctx->univplan, pid); - case T_CaseTestExpr: { - if (!do_convert_expr_to_common_plan(pid, linitial(ctx->exprBufStack), ctx)) - goto end; - break; - } + univPlanExprAddCaseWhenExpr(ctx->univplan, uid); + if (!do_convert_expr_to_common_plan(uid, casewhen->expr, ctx)) goto end; - case T_Param: { - Param *param = (Param *)expr; - if (param->paramkind != PARAM_EXEC) - goto end; - univPlanExprAddParam( - ctx->univplan, pid, (UnivplanParamKind)param->paramkind, param->paramid, - map_hawq_type_to_common_plan(param->paramtype), param->paramtypmod); - break; - } + univPlanExprAddCaseWhenResult(ctx->univplan, uid); + if (!do_convert_expr_to_common_plan(uid, casewhen->result, ctx)) goto end; - case T_SubPlan: { - SubPlan *subplan = (SubPlan *)expr; - // TODO(chiyang): support ExecHashSubPlan - if (subplan->useHashTable) - goto end; - if (!checkSupportedSubLinkType(subplan->subLinkType)) - goto end; - uid = univPlanExprAddSubPlan( - ctx->univplan, pid, (UnivplanSubLinkType)subplan->subLinkType, - subplan->plan_id, subplan->qDispSliceId, - map_hawq_type_to_common_plan(subplan->firstColType), - subplan->firstColTypmod, subplan->useHashTable, subplan->is_initplan); - int num = 0; - if ((num = list_length(subplan->setParam)) > 0) { - int32_t *setParam = palloc(num * sizeof(int32_t)); - int idx = 0; - foreach (lc, subplan->setParam) - setParam[idx++] = lfirst_int(lc); - univPlanSubPlanAddSetParam(ctx->univplan, uid, num, setParam); - pfree(setParam); - } - if ((num = list_length(subplan->parParam)) > 0) { - int32_t *parParam = palloc(num * sizeof(int32_t)); - int idx = 0; - foreach (lc, subplan->parParam) - parParam[idx++] = lfirst_int(lc); - univPlanSubPlanAddParParam(ctx->univplan, uid, num, parParam); - pfree(parParam); - } - if ((num = list_length(subplan->paramIds)) > 0) { - int32_t *testexprParam = palloc(num * sizeof(int32_t)); - int idx = 0; - foreach (lc, subplan->paramIds) - testexprParam[idx++] = lfirst_int(lc); - univPlanSubPlanAddTestexprParam(ctx->univplan, uid, num, testexprParam); - pfree(testexprParam); + parentExprSwitchTo(old, ctx); + break; } - foreach (lc, subplan->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) + + case T_CaseTestExpr: { + if (!do_convert_expr_to_common_plan(pid, linitial(ctx->exprBufStack), + ctx)) goto end; + break; } - univPlanExprAddSubPlanTestexpr(ctx->univplan, uid); - if (subplan->testexpr && - !do_convert_expr_to_common_plan(uid, (Expr *)subplan->testexpr, ctx)) - goto end; - break; - } - - case T_ScalarArrayOpExpr: { - ScalarArrayOpExpr *scalarArrayOpExpr = (ScalarArrayOpExpr *)expr; - old = parentExprSwitchTo(expr, ctx); + case T_Param: { + Param *param = (Param *)expr; + if (param->paramkind != PARAM_EXEC) goto end; + univPlanExprAddParam(ctx->univplan, pid, + (UnivplanParamKind)param->paramkind, param->paramid, + map_hawq_type_to_common_plan(param->paramtype), + param->paramtypmod); + break; + } - mappingFuncId = HAWQ_FUNCOID_MAPPING(scalarArrayOpExpr->opfuncid); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) - goto end; - uid = univPlanExprAddScalarArrayOpExpr(ctx->univplan, pid, mappingFuncId, - scalarArrayOpExpr->useOr); - foreach (lc, scalarArrayOpExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) + case T_SubPlan: { + SubPlan *subplan = (SubPlan *)expr; + // TODO(chiyang): support ExecHashSubPlan + if (subplan->useHashTable) goto end; + if (!checkSupportedSubLinkType(subplan->subLinkType)) goto end; + uid = univPlanExprAddSubPlan( + ctx->univplan, pid, (UnivplanSubLinkType)subplan->subLinkType, + subplan->plan_id, subplan->qDispSliceId, + map_hawq_type_to_common_plan(subplan->firstColType), + subplan->firstColTypmod, subplan->useHashTable, subplan->is_initplan); + int num = 0; + if ((num = list_length(subplan->setParam)) > 0) { + int32_t *setParam = palloc(num * sizeof(int32_t)); + int idx = 0; + foreach (lc, subplan->setParam) + setParam[idx++] = lfirst_int(lc); + univPlanSubPlanAddSetParam(ctx->univplan, uid, num, setParam); + pfree(setParam); + } + if ((num = list_length(subplan->parParam)) > 0) { + int32_t *parParam = palloc(num * sizeof(int32_t)); + int idx = 0; + foreach (lc, subplan->parParam) + parParam[idx++] = lfirst_int(lc); + univPlanSubPlanAddParParam(ctx->univplan, uid, num, parParam); + pfree(parParam); + } + if ((num = list_length(subplan->paramIds)) > 0) { + int32_t *testexprParam = palloc(num * sizeof(int32_t)); + int idx = 0; + foreach (lc, subplan->paramIds) + testexprParam[idx++] = lfirst_int(lc); + univPlanSubPlanAddTestexprParam(ctx->univplan, uid, num, testexprParam); + pfree(testexprParam); + } + foreach (lc, subplan->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } + univPlanExprAddSubPlanTestexpr(ctx->univplan, uid); + if (subplan->testexpr && + !do_convert_expr_to_common_plan(uid, (Expr *)subplan->testexpr, ctx)) goto end; + break; } - parentExprSwitchTo(old, ctx); - break; - } + case T_ScalarArrayOpExpr: { + ScalarArrayOpExpr *scalarArrayOpExpr = (ScalarArrayOpExpr *)expr; - case T_CoalesceExpr: { - CoalesceExpr *coalesceExpr = (CoalesceExpr *)expr; + old = parentExprSwitchTo(expr, ctx); - old = parentExprSwitchTo(expr, ctx); + mappingFuncId = HAWQ_FUNCOID_MAPPING(scalarArrayOpExpr->opfuncid); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) goto end; + uid = univPlanExprAddScalarArrayOpExpr(ctx->univplan, pid, mappingFuncId, + scalarArrayOpExpr->useOr); + foreach (lc, scalarArrayOpExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - int32_t coalesceType = - map_hawq_type_to_common_plan(coalesceExpr->coalescetype); - if (checkUnsupportedDataType(coalesceExpr->coalescetype, DateStyle)) { - goto end; - } - uid = univPlanExprAddCoalesceExpr(ctx->univplan, pid, coalesceType, - exprTypmod(coalesceExpr)); - foreach (lc, coalesceExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) - goto end; + parentExprSwitchTo(old, ctx); + break; } - parentExprSwitchTo(old, ctx); - break; - } + case T_CoalesceExpr: { + CoalesceExpr *coalesceExpr = (CoalesceExpr *)expr; - case T_NullIfExpr: { - NullIfExpr *nullIfExpr = (NullIfExpr *)expr; + old = parentExprSwitchTo(expr, ctx); - old = parentExprSwitchTo(expr, ctx); + int32_t coalesceType = + map_hawq_type_to_common_plan(coalesceExpr->coalescetype); + if (checkUnsupportedDataType(coalesceExpr->coalescetype, DateStyle)) { + goto end; + } + uid = univPlanExprAddCoalesceExpr(ctx->univplan, pid, coalesceType, + exprTypmod(coalesceExpr)); + foreach (lc, coalesceExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - mappingFuncId = HAWQ_FUNCOID_MAPPING(nullIfExpr->opfuncid); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) - goto end; - int32_t nullIfType = map_hawq_type_to_common_plan(exprType(nullIfExpr)); - if (checkUnsupportedDataType(exprType(nullIfExpr), DateStyle)) { - goto end; + parentExprSwitchTo(old, ctx); + break; } - uid = univPlanExprAddNullIfExpr(ctx->univplan, pid, mappingFuncId, - nullIfType, exprTypmod(nullIfExpr)); - foreach (lc, nullIfExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) + + case T_NullIfExpr: { + NullIfExpr *nullIfExpr = (NullIfExpr *)expr; + + old = parentExprSwitchTo(expr, ctx); + + mappingFuncId = HAWQ_FUNCOID_MAPPING(nullIfExpr->opfuncid); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) goto end; + int32_t nullIfType = map_hawq_type_to_common_plan(exprType(nullIfExpr)); + if (checkUnsupportedDataType(exprType(nullIfExpr), DateStyle)) { goto end; + } + uid = univPlanExprAddNullIfExpr(ctx->univplan, pid, mappingFuncId, + nullIfType, exprTypmod(nullIfExpr)); + foreach (lc, nullIfExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } + + parentExprSwitchTo(old, ctx); + break; } - parentExprSwitchTo(old, ctx); - break; - } + case T_DistinctExpr: { + DistinctExpr *distExpr = (DistinctExpr *)expr; - case T_DistinctExpr: { - DistinctExpr *distExpr = (DistinctExpr *)expr; + old = parentExprSwitchTo(expr, ctx); - old = parentExprSwitchTo(expr, ctx); + mappingFuncId = HAWQ_FUNCOID_MAPPING(distExpr->opfuncid); + if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) goto end; + uid = univPlanExprAddDistinctExpr(ctx->univplan, pid, mappingFuncId); + foreach (lc, distExpr->args) { + if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) goto end; + } - mappingFuncId = HAWQ_FUNCOID_MAPPING(distExpr->opfuncid); - if (IS_HAWQ_MAPPING_FUNCID_INVALID(mappingFuncId)) - goto end; - uid = univPlanExprAddDistinctExpr(ctx->univplan, pid, mappingFuncId); - foreach (lc, distExpr->args) { - if (!do_convert_expr_to_common_plan(uid, lfirst(lc), ctx)) - goto end; + parentExprSwitchTo(old, ctx); + break; } - parentExprSwitchTo(old, ctx); - break; - } - - case T_Grouping: { - old = parentExprSwitchTo(expr, ctx); - uid = univPlanExprAddGrouping(ctx->univplan, pid); - parentExprSwitchTo(old, ctx); - break; - } + case T_Grouping: { + old = parentExprSwitchTo(expr, ctx); + uid = univPlanExprAddGrouping(ctx->univplan, pid); + parentExprSwitchTo(old, ctx); + break; + } - case T_GroupId: { - old = parentExprSwitchTo(expr, ctx); - uid = univPlanExprAddGroupId(ctx->univplan, pid); - parentExprSwitchTo(old, ctx); - break; - } + case T_GroupId: { + old = parentExprSwitchTo(expr, ctx); + uid = univPlanExprAddGroupId(ctx->univplan, pid); + parentExprSwitchTo(old, ctx); + break; + } - case T_GroupingFunc: { - GroupingFunc *groupingFunc = (GroupId *)expr; - old = parentExprSwitchTo(expr, ctx); - int32_t *args = palloc(list_length(groupingFunc->args) * sizeof(int32_t)); + case T_GroupingFunc: { + GroupingFunc *groupingFunc = (GroupId *)expr; + old = parentExprSwitchTo(expr, ctx); + int32_t *args = palloc(list_length(groupingFunc->args) * sizeof(int32_t)); - ListCell *lc; - int idx = 0; - foreach (lc, groupingFunc->args) { args[idx++] = (int)intVal(lfirst(lc)); } - uid = univPlanExprAddGroupingFunc(ctx->univplan, pid, args, - list_length(groupingFunc->args), - groupingFunc->ngrpcols); - pfree(args); - parentExprSwitchTo(old, ctx); - break; - } + ListCell *lc; + int idx = 0; + foreach (lc, groupingFunc->args) { + args[idx++] = (int)intVal(lfirst(lc)); + } + uid = univPlanExprAddGroupingFunc(ctx->univplan, pid, args, + list_length(groupingFunc->args), + groupingFunc->ngrpcols); + pfree(args); + parentExprSwitchTo(old, ctx); + break; + } - default: - goto end; + default: + goto end; } return true; @@ -2014,11 +2033,11 @@ void getFmtName(char *fmtOptsJson, char **fmtName) { *fmtName = NULL; struct json_object *jobj = json_tokener_parse(fmtOptsJson); - json_object *returnObj; + struct json_object *returnObj; if (jobj != NULL && json_object_object_get_ex(jobj, "formatter", &returnObj)) { if (returnObj != NULL) { - const char *str = json_object_to_json_string(returnObj); + const char *str = json_object_get_string(returnObj); *fmtName = (char *)palloc0(strlen(str) + 1); strcpy(*fmtName, str); } @@ -2033,12 +2052,10 @@ void checkUnsupportedStmt(PlannedStmt *stmt, CommonPlanContext *ctx) { if (stmt->commandType == CMD_INSERT && !checkInsertSupportTable(stmt)) goto end; - if (stmt->originNodeType == T_CopyStmt) - goto end; + if (stmt->originNodeType == T_CopyStmt) goto end; // disable insert into for common plan currently - if (stmt->intoClause) - goto end; + if (stmt->intoClause) goto end; return; @@ -2063,15 +2080,13 @@ bool checkInsertSupportTable(PlannedStmt *stmt) { " WHERE reloid = :1 " " FOR UPDATE ", ObjectIdGetDatum(rte->relid))); - if (!HeapTupleIsValid(tuple)) - goto end; + if (!HeapTupleIsValid(tuple)) goto end; bool isNull; char fmtCode = DatumGetChar(heap_getattr(tuple, Anum_pg_exttable_fmttype, RelationGetDescr(pgExtTableRel), &isNull)); - if (!fmttype_is_custom(fmtCode)) - goto end; + if (!fmttype_is_custom(fmtCode)) goto end; Datum fmtOptDatum = heap_getattr(tuple, Anum_pg_exttable_fmtopts, RelationGetDescr(pgExtTableRel), &isNull); @@ -2084,8 +2099,7 @@ bool checkInsertSupportTable(PlannedStmt *stmt) { bool isSupported = fmtName && (!pg_strncasecmp(fmtName, "magmaap", FORMAT_MAGMAAP_LEN) || !pg_strncasecmp(fmtName, "orc", FORMAT_ORC_LEN)); - if (!isSupported) - goto end; + if (!isSupported) goto end; heap_close(pgExtTableRel, RowExclusiveLock); return true; @@ -2101,12 +2115,10 @@ void checkReadStatsOnlyForAgg(Agg *node, CommonPlanContext *ctx) { ((Plan *)node)->lefttree->type == T_Append || ((Plan *)node)->lefttree->type == T_AppendOnlyScan) { // not work for group by statements - if (node->numCols - node->numNullCols > 0) - return; + if (node->numCols - node->numNullCols > 0) return; // not work for scan with filter - if (((Plan *)node)->lefttree->qual) - return; + if (((Plan *)node)->lefttree->qual) return; // for append node if (((Plan *)node)->lefttree->type == T_Append) { @@ -2114,15 +2126,12 @@ void checkReadStatsOnlyForAgg(Agg *node, CommonPlanContext *ctx) { ListCell *lc; foreach (lc, appendNode->appendplans) { Plan *appendPlan = (Plan *)lfirst(lc); - if (!appendPlan->type == T_ExternalScan) - return; - if (appendPlan->qual) - return; + if (!appendPlan->type == T_ExternalScan) return; + if (appendPlan->qual) return; ListCell *lstcell; foreach (lstcell, appendPlan->targetlist) { TargetEntry *te = (TargetEntry *)lfirst(lstcell); - if (te->expr->type != T_Var) - return; + if (te->expr->type != T_Var) return; } } } @@ -2142,8 +2151,7 @@ void checkReadStatsOnlyForAgg(Agg *node, CommonPlanContext *ctx) { return; } // special case for count(*) - if (list_length(aggref->args) == 0) - return; + if (list_length(aggref->args) == 0) return; ListCell *lc2; foreach (lc2, aggref->args) { Expr *expr = lfirst(lc2); @@ -2175,14 +2183,14 @@ void checkReadStatsOnlyForAgg(Agg *node, CommonPlanContext *ctx) { bool checkSupportedSubLinkType(SubLinkType sublinkType) { switch (sublinkType) { - case EXISTS_SUBLINK: - case ALL_SUBLINK: - case ANY_SUBLINK: - case EXPR_SUBLINK: - case NOT_EXISTS_SUBLINK: - return true; - default: - return false; + case EXISTS_SUBLINK: + case ALL_SUBLINK: + case ANY_SUBLINK: + case EXPR_SUBLINK: + case NOT_EXISTS_SUBLINK: + return true; + default: + return false; } } @@ -2214,33 +2222,33 @@ void convert_querydesc_to_common_plan(QueryDesc *queryDesc, char *extval = NULL; if (!pxd->isnull) { switch (pxd->ptype) { - case BOOLOID: - case INT8OID: - case INT4OID: - case INT2OID: - case FLOAT8OID: - case FLOAT4OID: - case TIMEOID: - case TIMETZOID: - extval = OidOutputFunctionCall(typoutput, pxd->value); - break; - case DATEOID: - case TIMESTAMPOID: - case TIMESTAMPTZOID: - DateStyle = USE_ISO_DATES; - DateOrder = DATEORDER_MDY; - extval = OidOutputFunctionCall(typoutput, pxd->value); - DateStyle = savedDateStyle; - DateOrder = savedDateOrder; - break; - case INTERVALOID: { - Interval *ival = (Interval *)DatumGetPointer(pxd->value); - extval = palloc(sizeof(char) * INT64_MAX_LENGTH * 2); - sprintf(extval, "%d-%d-%lld", ival->month, ival->day, ival->time); - } break; - default: - if (pxd->value) + case BOOLOID: + case INT8OID: + case INT4OID: + case INT2OID: + case FLOAT8OID: + case FLOAT4OID: + case TIMEOID: + case TIMETZOID: + extval = OidOutputFunctionCall(typoutput, pxd->value); + break; + case DATEOID: + case TIMESTAMPOID: + case TIMESTAMPTZOID: + DateStyle = USE_ISO_DATES; + DateOrder = DATEORDER_MDY; extval = OidOutputFunctionCall(typoutput, pxd->value); + DateStyle = savedDateStyle; + DateOrder = savedDateOrder; + break; + case INTERVALOID: { + Interval *ival = (Interval *)DatumGetPointer(pxd->value); + extval = palloc(sizeof(char) * INT64_MAX_LENGTH * 2); + sprintf(extval, "%d-%d-%lld", ival->month, ival->day, ival->time); + } break; + default: + if (pxd->value) + extval = OidOutputFunctionCall(typoutput, pxd->value); } } univPlanAddParamInfo(ctx->univplan, diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index ecfda8c84..60e96bade 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -495,6 +495,8 @@ PlannerConfig *DefaultPlannerConfig(void) c1->enable_magma_bitmapscan = enable_magma_bitmapscan; c1->enable_magma_seqscan = enable_magma_seqscan; c1->enable_magma_indexonlyscan = enable_magma_indexonlyscan; + c1->enable_orc_indexscan = enable_orc_indexscan; + c1->enable_orc_indexonlyscan = enable_orc_indexonlyscan; c1->enable_tidscan = enable_tidscan; c1->enable_sort = enable_sort; c1->enable_hashagg = enable_hashagg; diff --git a/src/backend/optimizer/plan/planpartition.c b/src/backend/optimizer/plan/planpartition.c index 6af2b4b72..e285019ed 100644 --- a/src/backend/optimizer/plan/planpartition.c +++ b/src/backend/optimizer/plan/planpartition.c @@ -781,6 +781,8 @@ AdjustVarnoWalker(Node *node, AdjustVarnoContext *ctx) case T_MagmaIndexScan: case T_MagmaIndexOnlyScan: case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_BitmapIndexScan: case T_BitmapHeapScan: case T_BitmapTableScan: diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index 53e2bff69..a3ca1b19d 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -534,6 +534,8 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, const int rtoffset) break; case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: { IndexScan *splan = (IndexScan *) plan; @@ -545,7 +547,7 @@ set_plan_refs(PlannerGlobal *glob, Plan *plan, const int rtoffset) #ifdef USE_ASSERT_CHECKING RangeTblEntry *rte = rt_fetch(splan->scan.scanrelid, glob->finalrtable); char relstorage = get_rel_relstorage(rte->relid); - Assert(!relstorage_is_ao(relstorage)); + Assert(!((relstorage_is_ao(relstorage) && (relstorage != RELSTORAGE_ORC)))); #endif splan->scan.plan.targetlist = diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index 5da1ec965..a448f0afa 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -1172,6 +1172,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, List *rtable, break; case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: finalize_primnode((Node *) ((IndexScan *) plan)->indexqual, &context); /* diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 217b7edfa..29200bf39 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -248,6 +248,8 @@ pathnode_walk_kids(Path *path, case T_AppendOnlyScan: case T_ParquetScan: case T_IndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: case T_TidScan: case T_SubqueryScan: case T_FunctionScan: @@ -1248,6 +1250,11 @@ create_index_path(PlannerInfo *root, if (index->rel->ext == RELSTORAGE_EXTERNAL) pathnode->path.pathtype = index->indexonly ? T_MagmaIndexOnlyScan : T_MagmaIndexScan; + else if (index->rel->ext == RELSTORAGE_ORC) + { + pathnode->path.pathtype = index->indexonly ? + T_OrcIndexOnlyScan : T_OrcIndexScan; + } else pathnode->path.pathtype = T_IndexScan; pathnode->path.parent = rel; @@ -1332,17 +1339,17 @@ create_bitmap_heap_path(PlannerInfo *root, BitmapHeapPath *pathnode = makeNode(BitmapHeapPath); if (rel->ext == RELSTORAGE_EXTERNAL) - pathnode->path.pathtype = T_MagmaBitmapScan; + pathnode->path.pathtype = T_MagmaBitmapScan; else - pathnode->path.pathtype = T_BitmapHeapScan; + pathnode->path.pathtype = T_BitmapHeapScan; pathnode->path.parent = rel; pathnode->path.pathkeys = NIL; /* always unordered */ - /* Distribution is same as the base table. */ - pathnode->path.locus = cdbpathlocus_from_baserel(root, rel); - pathnode->path.motionHazard = false; - pathnode->path.rescannable = true; + /* Distribution is same as the base table. */ + pathnode->path.locus = cdbpathlocus_from_baserel(root, rel); + pathnode->path.motionHazard = false; + pathnode->path.rescannable = true; pathnode->bitmapqual = bitmapqual; pathnode->isjoininner = (outer_rel != NULL); diff --git a/src/backend/optimizer/util/walkers.c b/src/backend/optimizer/util/walkers.c index 010c421de..67e95b3fe 100644 --- a/src/backend/optimizer/util/walkers.c +++ b/src/backend/optimizer/util/walkers.c @@ -947,6 +947,8 @@ plan_tree_walker(Node *node, case T_IndexScan: case T_DynamicIndexScan: + case T_OrcIndexScan: + case T_OrcIndexOnlyScan: if (walk_scan_node_fields((Scan *) node, walker, context)) return true; if (walker((Node *) ((IndexScan *) node)->indexqual, context)) diff --git a/src/backend/postmaster/identity.c b/src/backend/postmaster/identity.c index 2e5ed1945..acf4dc915 100644 --- a/src/backend/postmaster/identity.c +++ b/src/backend/postmaster/identity.c @@ -551,7 +551,7 @@ bool GetRelOpt_appendonly_fromOptions(List *options, bool *appendonly) if (pg_strcasecmp(def->defname, "appendonly") == 0) { bool need_free_arg = false; - *appendonly = defGetString(def, &need_free_arg) == "true"; + *appendonly = pg_strcasecmp(defGetString(def, &need_free_arg), "true") == 0; return true; } } diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index cd48d605c..03858a132 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -103,6 +103,7 @@ #include "cdb/cdbquerycontextdispatching.h" #include "cdb/ml_ipc.h" #include "utils/guc.h" +#include "utils/faultinjector.h" #include "access/twophase.h" #include #include "utils/resscheduler.h" @@ -1136,6 +1137,17 @@ exec_mpp_query(const char *query_string, ¤tFilesystemCredentialsMemoryContext); FinalizeQueryContextInfo(query->contextdisp); } + /* rebuild relation info for native orc index */ + if (utilityStmt->type == T_IndexStmt) + { + IndexStmt *stmt = (IndexStmt*)utilityStmt; + if (stmt->contextdisp) + { + RebuildQueryContext(stmt->contextdisp, ¤tFilesystemCredentials, + ¤tFilesystemCredentialsMemoryContext); + FinalizeQueryContextInfo(stmt->contextdisp); + } + } } /* @@ -5424,6 +5436,15 @@ PostgresMain(int argc, char *argv[], const char *username) } } else { + #ifdef FAULT_INJECTOR + // expect FaultInjectorType: FaultInjectorTypeError, FaultInjectorTypeFatal + FaultInjectorType_e ret = FaultInjector_InjectFaultIfSet( + QEQueryError, + DDLNotSpecified, + "", // databaseName + ""); // tableName + #endif + if (serializedCommonPlanLen > 0) { exec_mpp_query_new( serializedCommonPlan, diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 1cd6e2bae..81d4635e1 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -1401,6 +1401,13 @@ ProcessUtility(Node *parsetree, case T_IndexStmt: /* CREATE INDEX */ { + /* So far, only native orc support create index in qe */ + if (Gp_role == GP_ROLE_EXECUTE) + { + IndexStmt *stmt = (IndexStmt *) parsetree; + CDBDefineIndex(stmt); + return; + } IndexStmt *stmt = (IndexStmt *) parsetree; Oid relid; LOCKMODE lockmode; diff --git a/src/backend/utils/misc/faultinjector.c b/src/backend/utils/misc/faultinjector.c index 192b884ae..f5233c237 100644 --- a/src/backend/utils/misc/faultinjector.c +++ b/src/backend/utils/misc/faultinjector.c @@ -117,6 +117,7 @@ FaultInjectorTypeEnumToString[] = { _("user_cancel"), _("proc_die"), _("interrupt"), + _("quiet_exit"), _("not recognized"), }; @@ -327,6 +328,22 @@ FaultInjectorIdentifierEnumToString[] = { /* inject fault before cleaning up a runaway query */ _("interconnect_stop_ack_is_lost"), /* inject fault in interconnect to skip sending the stop ack */ + _("main_dispatch_connect"), + /* inject fault in main dispatcher connect, set error */ + _("main_dispatch_get_qes_detail"), + /* inject fault in main dispatch get qe detail, set error */ + _("proxy_dispatcher_connect"), + /* inject fault in proxy dispatcher connect, set error */ + _("main_dispatch_send_plan"), + /* inject fault in main dispatch send plan, set error */ + _("proxy_dispatch_send_plan"), + /* inject fault in proxy dispatch send plan, set error */ + _("qe_query_error"), + /* inject fault in qe query error, set error or fatal */ + _("proxy_dispatch_consume_data"), + /* inject fault in proxy dispatch consume data, set error or quiet_exit*/ + _("main_dispatch_consume_data"), + /* inject fault in main dispatch consume data, set error */ _("not recognized"), }; @@ -819,7 +836,8 @@ FaultInjector_InjectFaultIfSet( break; } - + case FaultInjectorQuietExit: + exit(0); default: ereport(LOG, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 25f871416..723562b0f 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -641,6 +641,7 @@ char *enable_alpha_newqe_str = "ON"; // filter push down for orc scan char *orc_enable_filter_pushdown = "ON"; int orc_update_delete_work_mem; +char *orc_enable_no_limit_numeric = "OFF"; // enable magma in hawq bool hawq_init_with_magma = false; @@ -673,6 +674,8 @@ bool enable_magma_indexscan = false; bool enable_magma_seqscan = true; bool enable_magma_bitmapscan = false; bool enable_magma_indexonlyscan = false; +bool enable_orc_indexscan = false; +bool enable_orc_indexonlyscan = false; bool force_bitmap_table_scan = false; bool enable_tidscan = true; bool enable_sort = true; @@ -1150,6 +1153,22 @@ static struct config_bool ConfigureNamesBool[] = &enable_magma_indexonlyscan, true, NULL, NULL }, + { + {"enable_orc_indexonlyscan", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of orc-indexonly-scan plans."), + NULL + }, + &enable_orc_indexonlyscan, + false, NULL, NULL + }, + { + {"enable_orc_indexscan", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enables the planner's use of orc-index-scan plans."), + NULL + }, + &enable_orc_indexscan, + false, NULL, NULL + }, { {"enable_bitmapscan", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables the planner's use of bitmap-scan plans."), @@ -7570,6 +7589,16 @@ static struct config_string ConfigureNamesString[] = "ON", assign_switch_mode, NULL }, + { + {"orc_enable_no_limit_numeric", PGC_USERSET, EXTERNAL_TABLES, + gettext_noop("Enable no limit numeric"), + gettext_noop("Valid values are \"OFF\" and \"ON\"."), + GUC_NOT_IN_SAMPLE | GUC_NO_SHOW_ALL + }, + &orc_enable_no_limit_numeric, + "OFF", assign_switch_mode, NULL + }, + { {"magma_enable_shm", PGC_USERSET, EXTERNAL_TABLES, gettext_noop("Enable share memory for magma scan"), diff --git a/src/include/access/orcam.h b/src/include/access/orcam.h index 339ecba47..f181a9c81 100644 --- a/src/include/access/orcam.h +++ b/src/include/access/orcam.h @@ -26,6 +26,11 @@ #include "cdb/cdbquerycontextdispatching.h" #include "nodes/relation.h" +// here we use orc stripe size in MBytes +#define DEFAULT_ORC_STRIPE_SIZE 64 +#define MIN_ORC_STRIPE_SIZE 1 +#define MAX_ORC_STRIPE_SIZE 1024 + struct ScanState; typedef struct OrcFormatData OrcFormatData; @@ -105,4 +110,8 @@ extern uint64 orcEndUpdate(OrcUpdateDescData *updateDesc); // utils extern bool isDirectDispatch(Plan *plan); +// index +extern int64_t* orcCreateIndex(Relation rel, int idxId, List* segno, int64* eof, + List* columnsToRead, int sortIdx); + #endif /* ORCAM_H_ */ diff --git a/src/include/access/orcsegfiles.h b/src/include/access/orcsegfiles.h index f8b458260..a23521bb9 100644 --- a/src/include/access/orcsegfiles.h +++ b/src/include/access/orcsegfiles.h @@ -44,6 +44,10 @@ extern void updateOrcFileSegInfo(Relation rel, AppendOnlyEntry *aoEntry, int segNo, int64 eof, int64 uncompressedEof, int64 tupCountAdded, bool forInsert); +extern void insertInitialOrcIndexEntry(AppendOnlyEntry *aoEntry, int idxOid, int segNo); +extern void updateOrcIndexFileInfo(AppendOnlyEntry *aoEntry, int idxOid, int segNo, int64 eof); +extern void deleteOrcIndexFileInfo(AppendOnlyEntry *aoEntry, int idxOid); + extern List *orcGetAllSegFileSplits(AppendOnlyEntry *aoEntry, Snapshot snapshot); diff --git a/src/include/cdb/cdbdispatchresult.h b/src/include/cdb/cdbdispatchresult.h index c591da745..10c17c4b6 100644 --- a/src/include/cdb/cdbdispatchresult.h +++ b/src/include/cdb/cdbdispatchresult.h @@ -284,6 +284,10 @@ void cdbdisp_handleModifiedCatalogOnSegments(CdbDispatchResults *results, void (*handler)(QueryContextDispatchingSendBack sendback)); +void +cdbdisp_handleModifiedOrcIndexCatalogOnSegments(List **segnoToVseg, CdbDispatchResults *results, + void (*handler)(QueryContextDispatchingSendBack sendback, List **l1)); + extern void cdbdisp_handleModifiedCatalogOnSegmentsForUD( CdbDispatchResults *results, List **relFileNodeInfo, void (*handler1)(QueryContextDispatchingSendBack sendback, List **l1, diff --git a/src/include/cdb/cdbquerycontextdispatching.h b/src/include/cdb/cdbquerycontextdispatching.h index 32d7cd761..e65c2820a 100644 --- a/src/include/cdb/cdbquerycontextdispatching.h +++ b/src/include/cdb/cdbquerycontextdispatching.h @@ -103,6 +103,9 @@ CreateQueryContextDispatchingSendBack(int nfile); extern void UpdateCatalogModifiedOnSegments(QueryContextDispatchingSendBack sendback); +extern void +UpdateCatalogOrcIndexModifiedOnSegments(QueryContextDispatchingSendBack sendback, List **segnoToVseg); + extern void UpdateCatalogModifiedOnSegmentsForUD( QueryContextDispatchingSendBack sendback, List **relFileNodeInfo, List **indexList); diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 4e6d00697..c5e6323c7 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -39,6 +39,7 @@ extern void DefineIndex(Oid relationId, bool concurrent, bool part_expanded, /* MPP */ IndexStmt *stmt /* MPP */); +extern void CDBDefineIndex(IndexStmt *stmt); extern void RemoveIndex(RangeVar *relation, DropBehavior behavior); extern void ReindexIndex(ReindexStmt *stmt); extern void ReindexTable(ReindexStmt *stmt); diff --git a/src/include/commands/tablecmds.h b/src/include/commands/tablecmds.h index 105215d02..388b39ff0 100644 --- a/src/include/commands/tablecmds.h +++ b/src/include/commands/tablecmds.h @@ -185,4 +185,7 @@ extern Oid transformFkeyCheckAttrs(Relation pkrel, extern void buildExternalTableFormatOptionStringInJson(const char *fmtOptsStr, char **fmtOptsJson); + +extern void buildDefaultFormatterOptionsInJson( + int encoding, char externalFmtType, struct json_object *optJsonObject); #endif /* TABLECMDS_H */ diff --git a/src/include/nodes/nodes.h b/src/include/nodes/nodes.h index 56fd1c7a7..c45e907e9 100644 --- a/src/include/nodes/nodes.h +++ b/src/include/nodes/nodes.h @@ -118,6 +118,8 @@ typedef enum NodeTag T_MagmaIndexScan, T_MagmaIndexOnlyScan, T_MagmaBitmapScan, + T_OrcIndexScan, + T_OrcIndexOnlyScan, T_Plan_End, /* this one isn't a subclass of Plan: */ T_PlanInvalItem, @@ -178,6 +180,8 @@ typedef enum NodeTag T_MagmaIndexScanState, T_MagmaIndexOnlyScanState, T_MagmaBitmapScanState, + T_OrcIndexScanState, + T_OrcIndexOnlyScanState, T_PlanState_End, /* @@ -320,6 +324,7 @@ typedef enum NodeTag T_ResultRelSegFileInfoMapNode, T_VirtualSegmentNode, T_PlannerParamItem, + T_NativeOrcIndexFile, /* Tags for MPP planner nodes (relation.h) */ T_CdbMotionPath = 580, diff --git a/src/include/nodes/parsenodes.h b/src/include/nodes/parsenodes.h index 7bafd4d61..993ad6f2a 100644 --- a/src/include/nodes/parsenodes.h +++ b/src/include/nodes/parsenodes.h @@ -2162,6 +2162,12 @@ typedef struct IndexStmt { * bitmap index needs 3 additional oids. */ bool do_part; /* build indexes for child partitions */ bool magma; + + /* cdb create index */ + Oid relationOid; + List *allidxinfos; /* list of NativeOrcIndexFile */ + List *columnsToRead; /* all indexs of key and include columns */ + QueryContextInfo *contextdisp; /* native orc need to dispatch relation */ } IndexStmt; /* ---------------------- diff --git a/src/include/nodes/plannerconfig.h b/src/include/nodes/plannerconfig.h index fd4218ce9..8f6e38173 100644 --- a/src/include/nodes/plannerconfig.h +++ b/src/include/nodes/plannerconfig.h @@ -37,6 +37,8 @@ typedef struct PlannerConfig bool enable_magma_bitmapscan; bool enable_magma_seqscan; bool enable_magma_indexonlyscan; + bool enable_orc_indexonlyscan; + bool enable_orc_indexscan; bool enable_tidscan; bool enable_sort; bool enable_hashagg; diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index b191d5947..ce6a7941f 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -564,8 +564,12 @@ typedef struct IndexScan /* logical index to use */ LogicalIndexInfo *logicalIndexInfo; + bool indexonly; /* attempt to skip user data fetches */ } IndexScan; +typedef IndexScan OrcIndexScan; +typedef IndexScan OrcIndexOnlyScan; + /* * DynamicIndexScan * Scan a list of indexes that will be determined at run time. diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index e7351410a..753932a85 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1580,4 +1580,13 @@ typedef struct ResultRelSegFileInfoMapNode List *segfileinfos; } ResultRelSegFileInfoMapNode; +/* Native Orc index file info */ +typedef struct NativeOrcIndexFile +{ + NodeTag type; + Oid indexOid; + List *segno; + int64 *eof; +} NativeOrcIndexFile; + #endif /* RELATION_H */ diff --git a/src/include/optimizer/newPlanner.h b/src/include/optimizer/newPlanner.h index 6067169e8..517f777c2 100644 --- a/src/include/optimizer/newPlanner.h +++ b/src/include/optimizer/newPlanner.h @@ -56,6 +56,8 @@ extern const char *show_new_interconnect_type(); #define MAGMATYPE "magma" #define ORCTYPE "orc" +#define TEXTTYPE "text" +#define CSVTYPE "csv" typedef struct CommonPlanContext { plan_tree_base_prefix base; diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 48b7ac41a..412e95f7c 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -46,6 +46,8 @@ extern bool enable_magma_indexscan; extern bool enable_magma_bitmapscan; extern bool enable_magma_indexonlyscan; extern bool enable_magma_seqscan; +extern bool enable_orc_indexscan; +extern bool enable_orc_indexonlyscan; extern bool enable_tidscan; extern bool enable_sort; extern bool enable_hashagg; diff --git a/src/include/utils/faultinjector.h b/src/include/utils/faultinjector.h index b7cf10f33..abc6d8b4d 100644 --- a/src/include/utils/faultinjector.h +++ b/src/include/utils/faultinjector.h @@ -217,9 +217,19 @@ typedef enum FaultInjectorIdentifier_e { InterconnectStopAckIsLost, - /* INSERT has to be done before that line */ + /*new dispatcher*/ + MainDispatchConnect, + MainDispatchGetQEsDetail, + ProxyDispatcherConnect, + MainDispatchSendPlan, + ProxyDispatchSendPlan, + QEQueryError, + ProxyDispatchConsumeData, + MainDispatchConsumeData, + + /* INSERT has to be done before that line */ FaultInjectorIdMax, - + } FaultInjectorIdentifier_e; /* @@ -272,7 +282,8 @@ typedef enum FaultInjectorType_e { FaultInjectorTypeProcDie, FaultInjectorTypeInterrupt, - + /* for exit at where can not call ereport */ + FaultInjectorQuietExit, /* INSERT has to be done before that line */ FaultInjectorTypeMax, diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index a788debbd..21ee29228 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -323,13 +323,22 @@ extern int hawq_rm_nvseg_for_analyze_part_perquery_perseg_limit; extern int hawq_rm_nvseg_for_analyze_nopart_perquery_limit; extern int hawq_rm_nvseg_for_analyze_part_perquery_limit; -///////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// /* - * These guc and related code are temporary, maybe will be delete afterwords. + * Creating heap table on HAWQ's master node is not supported by default. + * To do this, you need to turn on the following two GUCs. + * + * enable_heap_table_on_master: Allow creating heap table. + * + * enable_pg_default_for_non_system_table: Allow putting non-system table(e.g., heap table) + * into pg_default. HAWQ does not support creating tablespace on its master node, + * so we use pg_default as a workaround. + * + * Using: CREATE TABLE t(id int) WITH(appendonly = false) TABLESPACE pg_default; */ extern bool enable_heap_table_on_master; extern bool enable_pg_default_for_non_system_table; -///////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// extern bool allow_file_count_bucket_num_mismatch; extern bool enable_pg_stat_activity_history; @@ -568,6 +577,7 @@ extern char *enable_alpha_newqe_str; extern char *orc_enable_filter_pushdown; extern int orc_update_delete_work_mem; +extern char *orc_enable_no_limit_numeric; extern bool magma_cache_read; extern char *magma_enable_shm; diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h index 2424bee29..3fbeee308 100644 --- a/src/include/utils/rel.h +++ b/src/include/utils/rel.h @@ -285,6 +285,7 @@ typedef struct StdRdOptions int blocksize; /* max varblock size (AO rels only) */ int pagesize; /* page size(Parquet rels only) */ int rowgroupsize; /* row group size (Parquet rels only)*/ + int stripesize; /* stripe size (ORC rels only) */ int compresslevel; /* compression level (AO rels only) */ char* compresstype; /* compression type (AO rels only) */ bool checksum; /* checksum (AO rels only) */