tuhaihe · pull · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025 · Jul 3, 2025
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
@@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
     NULL '<replaceable class="parameter">null_string</replaceable>'
     DEFAULT '<replaceable class="parameter">default_string</replaceable>'
-    HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
+    HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
     QUOTE '<replaceable class="parameter">quote_character</replaceable>'
     ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><replaceable class="parameter">integer</replaceable></term>
+    <listitem>
+     <para>
+      Specifies a non-negative integer value passed to the selected option.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>FORMAT</literal></term>
     <listitem>
@@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     <term><literal>HEADER</literal></term>
     <listitem>
      <para>
-      Specifies that the file contains a header line with the names of each
-      column in the file.  On output, the first line contains the column
-      names from the table.  On input, the first line is discarded when this
-      option is set to <literal>true</literal> (or equivalent Boolean value).
-      If this option is set to <literal>MATCH</literal>, the number and names
-      of the columns in the header line must match the actual column names of
-      the table, in order;  otherwise an error is raised.
+      On output, if this option is set to <literal>true</literal>
+      (or an equivalent Boolean value), the first line of the output will
+      contain the column names from the table.
+      Integer values <literal>0</literal> and <literal>1</literal> are
+      accepted as Boolean values, but other integers are not allowed for
+      <command>COPY TO</command> commands.
+     </para>
+     <para>
+      On input, if this option is set to <literal>true</literal>
+      (or an equivalent Boolean value), the first line of the input is
+      discarded.  If set to a non-negative integer, that number of
+      lines are discarded.  If set to <literal>MATCH</literal>, the first line
+      is discarded, and it must contain column names that exactly match the
+      table's columns, in both number and order; otherwise, an error is raised.
+      The <literal>MATCH</literal> value is only valid for
+      <command>COPY FROM</command> commands.
+     </para>
+     <para>
       This option is not allowed when using <literal>binary</literal> format.
-      The <literal>MATCH</literal> option is only valid for <command>COPY
-      FROM</command> commands.
      </para>
     </listitem>
    </varlistentry>

diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml
@@ -2832,8 +2832,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        </itemizedlist>
        The last two states are seen only when
        <xref linkend="guc-max-slot-wal-keep-size"/> is
-       non-negative. If <structfield>restart_lsn</structfield> is NULL, this
-       field is null.
+       non-negative.
       </para></entry>
      </row>
 

diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
@@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source)
 		rttg = RECOVERY_TARGET_TIMELINE_LATEST;
 	else
 	{
+		char	   *endp;
+		uint64		timeline;
+
 		rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
 
 		errno = 0;
-		strtoul(*newval, NULL, 0);
-		if (errno == EINVAL || errno == ERANGE)
+		timeline = strtou64(*newval, &endp, 0);
+
+		if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
+		{
+			GUC_check_errdetail("\"%s\" is not a valid number.",
+								"recovery_target_timeline");
+			return false;
+		}
+
+		if (timeline < 1 || timeline > PG_UINT32_MAX)
 		{
-			GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number.");
+			GUC_check_errdetail("\"%s\" must be between %u and %u.",
+								"recovery_target_timeline", 1, UINT_MAX);
 			return false;
 		}
 	}

diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 }
 
 /*
- * Extract a CopyHeaderChoice value from a DefElem.  This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
  */
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
 {
 	/*
 	 * If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 		return COPY_HEADER_TRUE;
 
 	/*
-	 * Allow 0, 1, "true", "false", "on", "off", or "match".
+	 * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+	 * "match".
 	 */
 	switch (nodeTag(def->arg))
 	{
 		case T_Integer:
-			switch (intVal(def->arg))
 			{
-				case 0:
-					return COPY_HEADER_FALSE;
-				case 1:
-					return COPY_HEADER_TRUE;
-				default:
-					/* otherwise, error out below */
-					break;
+				int			ival = intVal(def->arg);
+
+				if (ival < 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("a negative integer value cannot be "
+									"specified for %s", def->defname)));
+
+				if (!is_from && ival > 1)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot use multi-line header in COPY TO")));
+
+				return ival;
 			}
 			break;
 		default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 	}
 	ereport(ERROR,
 			(errcode(ERRCODE_SYNTAX_ERROR),
-			 errmsg("%s requires a Boolean value or \"match\"",
+			 errmsg("%s requires a Boolean value, a non-negative integer, "
+					"or the string \"match\"",
 					def->defname)));
 	return COPY_HEADER_FALSE;	/* keep compiler quiet */
 }
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
 			if (header_specified)
 				errorConflictingDefElem(defel, pstate);
 			header_specified = true;
-			opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+			opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
 		}
 		else if (strcmp(defel->defname, "quote") == 0)
 		{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 
 	/* Check header */
-	if (opts_out->binary && opts_out->header_line)
+	if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */

diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
 NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 {
 	int			fldct;
-	bool		done;
+	bool		done = false;
 
 	/* only available for text or csv input */
 	Assert(!cstate->opts.binary);
 
 	/* on input check that the header line is correct if needed */
-	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+	if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
 	{
 		ListCell   *cur;
 		TupleDesc	tupDesc;
+		int			lines_to_skip = cstate->opts.header_line;
+
+		/* If set to "match", one header line is skipped */
+		if (cstate->opts.header_line == COPY_HEADER_MATCH)
+			lines_to_skip = 1;
 
 		tupDesc = RelationGetDescr(cstate->rel);
 
-		cstate->cur_lineno++;
-		done = CopyReadLine(cstate, is_csv);
+		for (int i = 0; i < lines_to_skip; i++)
+		{
+			cstate->cur_lineno++;
+			if ((done = CopyReadLine(cstate, is_csv)))
+				break;
+		}
 
 		if (cstate->opts.header_line == COPY_HEADER_MATCH)
 		{

diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 														  cstate->file_encoding);
 
 	/* if a header has been requested send the line */
-	if (cstate->opts.header_line)
+	if (cstate->opts.header_line == COPY_HEADER_TRUE)
 	{
 		ListCell   *cur;
 		bool		hdr_delim = false;

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
@@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root,
 	/*
 	 * See if the inner relation is provably unique for this outer rel.
 	 *
-	 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
-	 * matter since the executor can make the equivalent optimization anyway;
-	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
-	 * must be considering a semijoin whose inner side is not provably unique
-	 * (else reduce_unique_semijoins would've simplified it), so there's no
-	 * point in calling innerrel_is_unique.  However, if the LHS covers all of
-	 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+	 * We have some special cases: for JOIN_SEMI, it doesn't matter since the
+	 * executor can make the equivalent optimization anyway.  It also doesn't
+	 * help enable use of Memoize, since a semijoin with a provably unique
+	 * inner side should have been reduced to an inner join in that case.
+	 * Therefore, we need not expend planner cycles on proofs.  (For
+	 * JOIN_ANTI, although it doesn't help the executor for the same reason,
+	 * it can benefit Memoize paths.)  For JOIN_UNIQUE_INNER, we must be
+	 * considering a semijoin whose inner side is not provably unique (else
+	 * reduce_unique_semijoins would've simplified it), so there's no point in
+	 * calling innerrel_is_unique.  However, if the LHS covers all of the
+	 * semijoin's min_lefthand, then it's appropriate to set inner_unique
 	 * because the path produced by create_unique_path will be unique relative
 	 * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
 	 * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
@@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root,
 	switch (jointype)
 	{
 		case JOIN_SEMI:
-		case JOIN_ANTI:
-
-			/*
-			 * XXX it may be worth proving this to allow a Memoize to be
-			 * considered for Nested Loop Semi/Anti Joins.
-			 */
 			extra.inner_unique = false; /* well, unproven */
 			break;
 		case JOIN_UNIQUE_INNER:
@@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
 		return NULL;
 
 	/*
-	 * Currently we don't do this for SEMI and ANTI joins unless they're
-	 * marked as inner_unique.  This is because nested loop SEMI/ANTI joins
-	 * don't scan the inner node to completion, which will mean memoize cannot
-	 * mark the cache entry as complete.
-	 *
-	 * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
-	 * = true.  Should we?  See add_paths_to_joinrel()
+	 * Currently we don't do this for SEMI and ANTI joins, because nested loop
+	 * SEMI/ANTI joins don't scan the inner node to completion, which means
+	 * memoize cannot mark the cache entry as complete.  Nor can we mark the
+	 * cache entry as complete after fetching the first inner tuple, because
+	 * if that tuple and the current outer tuple don't satisfy the join
+	 * clauses, a second inner tuple that satisfies the parameters would find
+	 * the cache entry already marked as complete.  The only exception is when
+	 * the inner relation is provably unique, as in that case, there won't be
+	 * a second matching tuple and we can safely mark the cache entry as
+	 * complete after fetching the first inner tuple.  Note that in such
+	 * cases, the SEMI join should have been reduced to an inner join by
+	 * reduce_unique_semijoins.
 	 */
-	if (!extra->inner_unique && (jointype == JOIN_SEMI ||
-								 jointype == JOIN_ANTI))
+	if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+		!extra->inner_unique)
 		return NULL;
 
 	/*

diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
@@ -20,15 +20,12 @@
 #include "tcop/dest.h"
 
 /*
- * Represents whether a header line should be present, and whether it must
- * match the actual names (which implies "true").
+ * Represents whether a header line must match the actual names
+ * (which implies "true"), and whether it should be present.
  */
-typedef enum CopyHeaderChoice
-{
-	COPY_HEADER_FALSE = 0,
-	COPY_HEADER_TRUE,
-	COPY_HEADER_MATCH,
-} CopyHeaderChoice;
+#define COPY_HEADER_MATCH	-1
+#define COPY_HEADER_FALSE	0
+#define COPY_HEADER_TRUE	1
 
 /*
  * Represents where to save input processing errors.  More values to be added
@@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
-	CopyHeaderChoice header_line;	/* header line? */
+	int			header_line;	/* number of lines to skip or COPY_HEADER_XXX
+								 * value (see the above) */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
 	int			null_print_len; /* length of same */
 	char	   *null_print_client;	/* same converted to file encoding */

diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
@@ -187,4 +187,54 @@ sub test_recovery_standby
 	  qr/FATAL: .* recovery ended before configured recovery target was reached/,
 	'recovery end before target reached is a fatal error');
 
+# Invalid timeline target
+$node_standby = PostgreSQL::Test::Cluster->new('standby_9');
+$node_standby->init_from_backup($node_primary, 'my_backup',
+	has_restoring => 1);
+$node_standby->append_conf('postgresql.conf',
+	"recovery_target_timeline = 'bogus'");
+
+$res = run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node_standby->data_dir,
+		'--log' => $node_standby->logfile,
+		'start',
+	]);
+ok(!$res, 'invalid timeline target (bogus value)');
+
+my $log_start = $node_standby->wait_for_log("is not a valid number");
+
+# Timeline target out of min range
+$node_standby->append_conf('postgresql.conf',
+	"recovery_target_timeline = '0'");
+
+$res = run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node_standby->data_dir,
+		'--log' => $node_standby->logfile,
+		'start',
+	]);
+ok(!$res, 'invalid timeline target (lower bound check)');
+
+$log_start =
+  $node_standby->wait_for_log("must be between 1 and 4294967295", $log_start);
+
+# Timeline target out of max range
+$node_standby->append_conf('postgresql.conf',
+	"recovery_target_timeline = '4294967296'");
+
+$res = run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node_standby->data_dir,
+		'--log' => $node_standby->logfile,
+		'start',
+	]);
+ok(!$res, 'invalid timeline target (upper bound check)');
+
+$log_start =
+  $node_standby->wait_for_log("must be between 1 and 4294967295", $log_start);
+
 done_testing();