Skip to content

Commit

Permalink
Add a DEFAULT option to COPY FROM
Browse files Browse the repository at this point in the history
This allows for a string which if an input field matches causes the
column's default value to be inserted. The advantage of this is that
the default can be inserted in some rows and not others, for which
non-default data is available.

The file_fdw extension is also modified to take allow use of this
option.

Israel Barth Rubio

Discussion: https://postgr.es/m/CAO_rXXAcqesk6DsvioOZ5zmeEmpUN5ktZf-9=9yu+DTr0Xr8Uw@mail.gmail.com
  • Loading branch information
adunstan committed Mar 13, 2023
1 parent 7b14e20 commit 9f8377f
Show file tree
Hide file tree
Showing 15 changed files with 447 additions and 24 deletions.
3 changes: 3 additions & 0 deletions contrib/file_fdw/data/copy_default.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
1,value,2022-07-04
2,\D,2022-07-03
3,\D,\D
17 changes: 17 additions & 0 deletions contrib/file_fdw/expected/file_fdw.out
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,23 @@ SELECT a, c FROM gft1;
(2 rows)

DROP FOREIGN TABLE gft1;
-- copy default tests
\set filename :abs_srcdir '/data/copy_default.csv'
CREATE FOREIGN TABLE copy_default (
id integer,
text_value text not null default 'test',
ts_value timestamp without time zone not null default '2022-07-05'
) SERVER file_server
OPTIONS (format 'csv', filename :'filename', default '\D');
SELECT id, text_value, ts_value FROM copy_default;
id | text_value | ts_value
----+------------+--------------------------
1 | value | Mon Jul 04 00:00:00 2022
2 | test | Sun Jul 03 00:00:00 2022
3 | test | Tue Jul 05 00:00:00 2022
(3 rows)

DROP FOREIGN TABLE copy_default;
-- privilege tests
SET ROLE regress_file_fdw_superuser;
SELECT * FROM agg_text ORDER BY a;
Expand Down
20 changes: 17 additions & 3 deletions contrib/file_fdw/file_fdw.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ static const struct FileFdwOption valid_options[] = {
{"quote", ForeignTableRelationId},
{"escape", ForeignTableRelationId},
{"null", ForeignTableRelationId},
{"default", ForeignTableRelationId},
{"encoding", ForeignTableRelationId},
{"force_not_null", AttributeRelationId},
{"force_null", AttributeRelationId},
Expand Down Expand Up @@ -712,6 +713,9 @@ static TupleTableSlot *
fileIterateForeignScan(ForeignScanState *node)
{
FileFdwExecutionState *festate = (FileFdwExecutionState *) node->fdw_state;
EState *estate = CreateExecutorState();
ExprContext *econtext;
MemoryContext oldcontext;
TupleTableSlot *slot = node->ss.ss_ScanTupleSlot;
bool found;
ErrorContextCallback errcallback;
Expand All @@ -728,15 +732,25 @@ fileIterateForeignScan(ForeignScanState *node)
* ExecStoreVirtualTuple. If we don't find another row in the file, we
* just skip the last step, leaving the slot empty as required.
*
* We can pass ExprContext = NULL because we read all columns from the
* file, so no need to evaluate default expressions.
* We pass ExprContext because there might be a use of the DEFAULT option
* in COPY FROM, so we may need to evaluate default expressions.
*/
ExecClearTuple(slot);
found = NextCopyFrom(festate->cstate, NULL,
econtext = GetPerTupleExprContext(estate);

/*
* DEFAULT expressions need to be evaluated in a per-tuple context, so
* switch in case we are doing that.
*/
oldcontext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
found = NextCopyFrom(festate->cstate, econtext,
slot->tts_values, slot->tts_isnull);
if (found)
ExecStoreVirtualTuple(slot);

/* Switch back to original memory context */
MemoryContextSwitchTo(oldcontext);

/* Remove error callback. */
error_context_stack = errcallback.previous;

Expand Down
11 changes: 11 additions & 0 deletions contrib/file_fdw/sql/file_fdw.sql
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,17 @@ OPTIONS (format 'csv', filename :'filename', delimiter ',');
SELECT a, c FROM gft1;
DROP FOREIGN TABLE gft1;

-- copy default tests
\set filename :abs_srcdir '/data/copy_default.csv'
CREATE FOREIGN TABLE copy_default (
id integer,
text_value text not null default 'test',
ts_value timestamp without time zone not null default '2022-07-05'
) SERVER file_server
OPTIONS (format 'csv', filename :'filename', default '\D');
SELECT id, text_value, ts_value FROM copy_default;
DROP FOREIGN TABLE copy_default;

-- privilege tests
SET ROLE regress_file_fdw_superuser;
SELECT * FROM agg_text ORDER BY a;
Expand Down
14 changes: 14 additions & 0 deletions doc/src/sgml/ref/copy.sgml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
DEFAULT '<replaceable class="parameter">default_string</replaceable>'
</synopsis>
</refsynopsisdiv>

Expand Down Expand Up @@ -368,6 +369,19 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
</listitem>
</varlistentry>

<varlistentry>
<term><literal>DEFAULT</literal></term>
<listitem>
<para>
Specifies the string that represents a default value. Each time the string
is found in the input file, the default value of the corresponding column
will be used.
This option is allowed only in <command>COPY FROM</command>, and only when
not using <literal>binary</literal> format.
</para>
</listitem>
</varlistentry>

<varlistentry>
<term><literal>WHERE</literal></term>
<listitem>
Expand Down
51 changes: 51 additions & 0 deletions src/backend/commands/copy.c
Original file line number Diff line number Diff line change
Expand Up @@ -464,6 +464,12 @@ ProcessCopyOptions(ParseState *pstate,
errorConflictingDefElem(defel, pstate);
opts_out->null_print = defGetString(defel);
}
else if (strcmp(defel->defname, "default") == 0)
{
if (opts_out->default_print)
errorConflictingDefElem(defel, pstate);
opts_out->default_print = defGetString(defel);
}
else if (strcmp(defel->defname, "header") == 0)
{
if (header_specified)
Expand Down Expand Up @@ -577,6 +583,11 @@ ProcessCopyOptions(ParseState *pstate,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify NULL in BINARY mode")));

if (opts_out->binary && opts_out->default_print)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("cannot specify DEFAULT in BINARY mode")));

/* Set defaults for omitted options */
if (!opts_out->delim)
opts_out->delim = opts_out->csv_mode ? "," : "\t";
Expand Down Expand Up @@ -612,6 +623,17 @@ ProcessCopyOptions(ParseState *pstate,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY null representation cannot use newline or carriage return")));

if (opts_out->default_print)
{
opts_out->default_print_len = strlen(opts_out->default_print);

if (strchr(opts_out->default_print, '\r') != NULL ||
strchr(opts_out->default_print, '\n') != NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("COPY default representation cannot use newline or carriage return")));
}

/*
* Disallow unsafe delimiter characters in non-CSV mode. We can't allow
* backslash because it would be ambiguous. We can't allow the other
Expand Down Expand Up @@ -705,6 +727,35 @@ ProcessCopyOptions(ParseState *pstate,
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the NULL specification")));

if (opts_out->default_print)
{
if (!is_from)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY DEFAULT only available using COPY FROM")));

/* Don't allow the delimiter to appear in the default string. */
if (strchr(opts_out->default_print, opts_out->delim[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("COPY delimiter must not appear in the DEFAULT specification")));

/* Don't allow the CSV quote char to appear in the default string. */
if (opts_out->csv_mode &&
strchr(opts_out->default_print, opts_out->quote[0]) != NULL)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("CSV quote character must not appear in the DEFAULT specification")));

/* Don't allow the NULL and DEFAULT string to be the same */
if (opts_out->null_print_len == opts_out->default_print_len &&
strncmp(opts_out->null_print, opts_out->default_print,
opts_out->null_print_len) == 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("NULL specification and DEFAULT specification cannot be the same")));
}
}

/*
Expand Down
20 changes: 13 additions & 7 deletions src/backend/commands/copyfrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -1565,11 +1565,11 @@ BeginCopyFrom(ParseState *pstate,
&in_func_oid, &typioparams[attnum - 1]);
fmgr_info(in_func_oid, &in_functions[attnum - 1]);

/* Get default info if needed */
if (!list_member_int(cstate->attnumlist, attnum) && !att->attgenerated)
/* Get default info if available */
defexprs[attnum - 1] = NULL;

if (!att->attgenerated)
{
/* attribute is NOT to be copied from input */
/* use default value if one exists */
Expr *defexpr = (Expr *) build_column_default(cstate->rel,
attnum);

Expand All @@ -1579,9 +1579,15 @@ BeginCopyFrom(ParseState *pstate,
defexpr = expression_planner(defexpr);

/* Initialize executable expression in copycontext */
defexprs[num_defaults] = ExecInitExpr(defexpr, NULL);
defmap[num_defaults] = attnum - 1;
num_defaults++;
defexprs[attnum - 1] = ExecInitExpr(defexpr, NULL);

/* if NOT copied from input */
/* use default value if one exists */
if (!list_member_int(cstate->attnumlist, attnum))
{
defmap[num_defaults] = attnum - 1;
num_defaults++;
}

/*
* If a default expression looks at the table being loaded,
Expand Down
85 changes: 77 additions & 8 deletions src/backend/commands/copyfromparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -842,9 +842,10 @@ NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
/*
* Read next tuple from file for COPY FROM. Return false if no more tuples.
*
* 'econtext' is used to evaluate default expression for each column not
* read from the file. It can be NULL when no default values are used, i.e.
* when all columns are read from the file.
* 'econtext' is used to evaluate default expression for each column that is
* either not read from the file or is using the DEFAULT option of COPY FROM.
* It can be NULL when no default values are used, i.e. when all columns are
* read from the file, and DEFAULT option is unset.
*
* 'values' and 'nulls' arrays must be the same length as columns of the
* relation passed to BeginCopyFrom. This function fills the arrays.
Expand All @@ -870,6 +871,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
/* Initialize all values for row to NULL */
MemSet(values, 0, num_phys_attrs * sizeof(Datum));
MemSet(nulls, true, num_phys_attrs * sizeof(bool));
cstate->defaults = (bool *) palloc0(num_phys_attrs * sizeof(bool));

if (!cstate->opts.binary)
{
Expand Down Expand Up @@ -938,12 +940,27 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,

cstate->cur_attname = NameStr(att->attname);
cstate->cur_attval = string;
values[m] = InputFunctionCall(&in_functions[m],
string,
typioparams[m],
att->atttypmod);

if (string != NULL)
nulls[m] = false;

if (cstate->defaults[m])
{
/*
* The caller must supply econtext and have switched into the
* per-tuple memory context in it.
*/
Assert(econtext != NULL);
Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);

values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
}
else
values[m] = InputFunctionCall(&in_functions[m],
string,
typioparams[m],
att->atttypmod);

cstate->cur_attname = NULL;
cstate->cur_attval = NULL;
}
Expand Down Expand Up @@ -1019,10 +1036,12 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
Assert(econtext != NULL);
Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);

values[defmap[i]] = ExecEvalExpr(defexprs[i], econtext,
values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
&nulls[defmap[i]]);
}

pfree(cstate->defaults);

return true;
}

Expand Down Expand Up @@ -1663,6 +1682,31 @@ CopyReadAttributesText(CopyFromState cstate)
if (input_len == cstate->opts.null_print_len &&
strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
cstate->raw_fields[fieldno] = NULL;
/* Check whether raw input matched default marker */
else if (cstate->opts.default_print &&
input_len == cstate->opts.default_print_len &&
strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
{
/* fieldno is 0-indexed and attnum is 1-indexed */
int m = list_nth_int(cstate->attnumlist, fieldno) - 1;

if (cstate->defexprs[m] != NULL)
{
/* defaults contain entries for all physical attributes */
cstate->defaults[m] = true;
}
else
{
TupleDesc tupDesc = RelationGetDescr(cstate->rel);
Form_pg_attribute att = TupleDescAttr(tupDesc, m);

ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unexpected DEFAULT in COPY data"),
errdetail("Column \"%s\" has no DEFAULT value.",
NameStr(att->attname))));
}
}
else
{
/*
Expand Down Expand Up @@ -1852,6 +1896,31 @@ CopyReadAttributesCSV(CopyFromState cstate)
if (!saw_quote && input_len == cstate->opts.null_print_len &&
strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
cstate->raw_fields[fieldno] = NULL;
/* Check whether raw input matched default marker */
else if (cstate->opts.default_print &&
input_len == cstate->opts.default_print_len &&
strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
{
/* fieldno is 0-index and attnum is 1-index */
int m = list_nth_int(cstate->attnumlist, fieldno) - 1;

if (cstate->defexprs[m] != NULL)
{
/* defaults contain entries for all physical attributes */
cstate->defaults[m] = true;
}
else
{
TupleDesc tupDesc = RelationGetDescr(cstate->rel);
Form_pg_attribute att = TupleDescAttr(tupDesc, m);

ereport(ERROR,
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
errmsg("unexpected DEFAULT in COPY data"),
errdetail("Column \"%s\" has no DEFAULT value.",
NameStr(att->attname))));
}
}

fieldno++;
/* Done if we hit EOL instead of a delim */
Expand Down
25 changes: 25 additions & 0 deletions src/bin/psql/t/001_basic.pl
Original file line number Diff line number Diff line change
Expand Up @@ -325,4 +325,29 @@ sub psql_fails_like
'client-side error commits transaction, no ON_ERROR_STOP and multiple -c switches'
);

# Test \copy from with DEFAULT option
$node->safe_psql(
'postgres',
"CREATE TABLE copy_default (
id integer PRIMARY KEY,
text_value text NOT NULL DEFAULT 'test',
ts_value timestamp without time zone NOT NULL DEFAULT '2022-07-05'
)"
);

my $copy_default_sql_file = "$tempdir/copy_default.csv";
append_to_file($copy_default_sql_file, "1,value,2022-07-04\n");
append_to_file($copy_default_sql_file, "2,placeholder,2022-07-03\n");
append_to_file($copy_default_sql_file, "3,placeholder,placeholder\n");

psql_like(
$node,
"\\copy copy_default from $copy_default_sql_file with (format 'csv', default 'placeholder');
SELECT * FROM copy_default",
qr/1\|value\|2022-07-04 00:00:00
2|test|2022-07-03 00:00:00
3|test|2022-07-05 00:00:00/,
'\copy from with DEFAULT'
);

done_testing();
Loading

0 comments on commit 9f8377f

Please sign in to comment.