From 0da29e4cb161f78a5ef534b3fb4467756a422e25 Mon Sep 17 00:00:00 2001
From: Richard Guo <rguo@postgresql.org>
Date: Thu, 3 Jul 2025 10:57:26 +0900
Subject: [PATCH 1/4] Enable use of Memoize for ANTI joins

Currently, we do not support Memoize for SEMI and ANTI joins because
nested loop SEMI/ANTI joins do not scan the inner relation to
completion, which prevents Memoize from marking the cache entry as
complete.  One might argue that we could mark the cache entry as
complete after fetching the first inner tuple, but that would not be
safe: if the first inner tuple and the current outer tuple do not
satisfy the join clauses, a second inner tuple matching the parameters
would find the cache entry already marked as complete.

However, if the inner side is provably unique, this issue doesn't
arise, since there would be no second matching tuple.  That said, this
doesn't help in the case of SEMI joins, because a SEMI join with a
provably unique inner side would already have been reduced to an inner
join by reduce_unique_semijoins.

Therefore, in this patch, we check whether the inner relation is
provably unique for ANTI joins and enable the use of Memoize in such
cases.

Author: Richard Guo <guofenglinux@gmail.com>
Reviewed-by: wenhui qiu <qiuwenhuifx@gmail.com>
Reviewed-by: Andrei Lepikhov <lepihov@gmail.com>
Discussion: https://postgr.es/m/CAMbWs48FdLiMNrmJL-g6mDvoQVt0yNyJAqMkv4e2Pk-5GKCZLA@mail.gmail.com
---
 src/backend/optimizer/path/joinpath.c | 47 +++++++++++----------
 src/test/regress/expected/memoize.out | 60 +++++++++++++++++++++++++++
 src/test/regress/sql/memoize.sql      | 27 ++++++++++++
 3 files changed, 112 insertions(+), 22 deletions(-)

diff --git a/src/backend/optimizer/path/joinpath.c b/src/backend/optimizer/path/joinpath.c
index 7aa8f5d799cac..ebedc5574ca9c 100644
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -154,13 +154,17 @@ add_paths_to_joinrel(PlannerInfo *root,
 	/*
 	 * See if the inner relation is provably unique for this outer rel.
 	 *
-	 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
-	 * matter since the executor can make the equivalent optimization anyway;
-	 * we need not expend planner cycles on proofs.  For JOIN_UNIQUE_INNER, we
-	 * must be considering a semijoin whose inner side is not provably unique
-	 * (else reduce_unique_semijoins would've simplified it), so there's no
-	 * point in calling innerrel_is_unique.  However, if the LHS covers all of
-	 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
+	 * We have some special cases: for JOIN_SEMI, it doesn't matter since the
+	 * executor can make the equivalent optimization anyway.  It also doesn't
+	 * help enable use of Memoize, since a semijoin with a provably unique
+	 * inner side should have been reduced to an inner join in that case.
+	 * Therefore, we need not expend planner cycles on proofs.  (For
+	 * JOIN_ANTI, although it doesn't help the executor for the same reason,
+	 * it can benefit Memoize paths.)  For JOIN_UNIQUE_INNER, we must be
+	 * considering a semijoin whose inner side is not provably unique (else
+	 * reduce_unique_semijoins would've simplified it), so there's no point in
+	 * calling innerrel_is_unique.  However, if the LHS covers all of the
+	 * semijoin's min_lefthand, then it's appropriate to set inner_unique
 	 * because the path produced by create_unique_path will be unique relative
 	 * to the LHS.  (If we have an LHS that's only part of the min_lefthand,
 	 * that is *not* true.)  For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
@@ -169,12 +173,6 @@ add_paths_to_joinrel(PlannerInfo *root,
 	switch (jointype)
 	{
 		case JOIN_SEMI:
-		case JOIN_ANTI:
-
-			/*
-			 * XXX it may be worth proving this to allow a Memoize to be
-			 * considered for Nested Loop Semi/Anti Joins.
-			 */
 			extra.inner_unique = false; /* well, unproven */
 			break;
 		case JOIN_UNIQUE_INNER:
@@ -715,16 +713,21 @@ get_memoize_path(PlannerInfo *root, RelOptInfo *innerrel,
 		return NULL;
 
 	/*
-	 * Currently we don't do this for SEMI and ANTI joins unless they're
-	 * marked as inner_unique.  This is because nested loop SEMI/ANTI joins
-	 * don't scan the inner node to completion, which will mean memoize cannot
-	 * mark the cache entry as complete.
-	 *
-	 * XXX Currently we don't attempt to mark SEMI/ANTI joins as inner_unique
-	 * = true.  Should we?  See add_paths_to_joinrel()
+	 * Currently we don't do this for SEMI and ANTI joins, because nested loop
+	 * SEMI/ANTI joins don't scan the inner node to completion, which means
+	 * memoize cannot mark the cache entry as complete.  Nor can we mark the
+	 * cache entry as complete after fetching the first inner tuple, because
+	 * if that tuple and the current outer tuple don't satisfy the join
+	 * clauses, a second inner tuple that satisfies the parameters would find
+	 * the cache entry already marked as complete.  The only exception is when
+	 * the inner relation is provably unique, as in that case, there won't be
+	 * a second matching tuple and we can safely mark the cache entry as
+	 * complete after fetching the first inner tuple.  Note that in such
+	 * cases, the SEMI join should have been reduced to an inner join by
+	 * reduce_unique_semijoins.
 	 */
-	if (!extra->inner_unique && (jointype == JOIN_SEMI ||
-								 jointype == JOIN_ANTI))
+	if ((jointype == JOIN_SEMI || jointype == JOIN_ANTI) &&
+		!extra->inner_unique)
 		return NULL;
 
 	/*
diff --git a/src/test/regress/expected/memoize.out b/src/test/regress/expected/memoize.out
index 38dfaf021c91d..150dc1b44cf62 100644
--- a/src/test/regress/expected/memoize.out
+++ b/src/test/regress/expected/memoize.out
@@ -25,6 +25,7 @@ begin
         ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
         ln := regexp_replace(ln, 'loops=\d+', 'loops=N');
         ln := regexp_replace(ln, 'Index Searches: \d+', 'Index Searches: N');
+        ln := regexp_replace(ln, 'Memory: \d+kB', 'Memory: NkB');
         return next ln;
     end loop;
 end;
@@ -500,3 +501,62 @@ RESET max_parallel_workers_per_gather;
 RESET parallel_tuple_cost;
 RESET parallel_setup_cost;
 RESET min_parallel_table_scan_size;
+-- Ensure memoize works for ANTI joins
+CREATE TABLE tab_anti (a int, b boolean);
+INSERT INTO tab_anti SELECT i%3, false FROM generate_series(1,100)i;
+ANALYZE tab_anti;
+-- Ensure we get a Memoize plan for ANTI join
+SELECT explain_memoize('
+SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN
+LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2
+ON t1.a+1 = t2.a
+WHERE t2.a IS NULL;', false);
+                                      explain_memoize                                       
+--------------------------------------------------------------------------------------------
+ Aggregate (actual rows=1.00 loops=N)
+   ->  Nested Loop Anti Join (actual rows=33.00 loops=N)
+         ->  Seq Scan on tab_anti t1 (actual rows=100.00 loops=N)
+         ->  Memoize (actual rows=0.67 loops=N)
+               Cache Key: (t1.a + 1), t1.a
+               Cache Mode: binary
+               Hits: 97  Misses: 3  Evictions: Zero  Overflows: 0  Memory Usage: NkB
+               ->  Subquery Scan on t2 (actual rows=0.67 loops=N)
+                     Filter: ((t1.a + 1) = t2.a)
+                     Rows Removed by Filter: 2
+                     ->  Unique (actual rows=2.67 loops=N)
+                           ->  Sort (actual rows=67.33 loops=N)
+                                 Sort Key: t2_1.a
+                                 Sort Method: quicksort  Memory: NkB
+                                 ->  Seq Scan on tab_anti t2_1 (actual rows=100.00 loops=N)
+(15 rows)
+
+-- And check we get the expected results.
+SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN
+LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2
+ON t1.a+1 = t2.a
+WHERE t2.a IS NULL;
+ count 
+-------
+    33
+(1 row)
+
+-- Ensure we do not add memoize node for SEMI join
+EXPLAIN (COSTS OFF)
+SELECT * FROM tab_anti t1 WHERE t1.a IN
+ (SELECT a FROM tab_anti t2 WHERE t2.b IN
+  (SELECT t1.b FROM tab_anti t3 WHERE t2.a > 1 OFFSET 0));
+                   QUERY PLAN                    
+-------------------------------------------------
+ Nested Loop Semi Join
+   ->  Seq Scan on tab_anti t1
+   ->  Nested Loop Semi Join
+         Join Filter: (t1.a = t2.a)
+         ->  Seq Scan on tab_anti t2
+         ->  Subquery Scan on "ANY_subquery"
+               Filter: (t2.b = "ANY_subquery".b)
+               ->  Result
+                     One-Time Filter: (t2.a > 1)
+                     ->  Seq Scan on tab_anti t3
+(10 rows)
+
+DROP TABLE tab_anti;
diff --git a/src/test/regress/sql/memoize.sql b/src/test/regress/sql/memoize.sql
index c0d47fa875ad9..8d1cdd6990c87 100644
--- a/src/test/regress/sql/memoize.sql
+++ b/src/test/regress/sql/memoize.sql
@@ -26,6 +26,7 @@ begin
         ln := regexp_replace(ln, 'Heap Fetches: \d+', 'Heap Fetches: N');
         ln := regexp_replace(ln, 'loops=\d+', 'loops=N');
         ln := regexp_replace(ln, 'Index Searches: \d+', 'Index Searches: N');
+        ln := regexp_replace(ln, 'Memory: \d+kB', 'Memory: NkB');
         return next ln;
     end loop;
 end;
@@ -244,3 +245,29 @@ RESET max_parallel_workers_per_gather;
 RESET parallel_tuple_cost;
 RESET parallel_setup_cost;
 RESET min_parallel_table_scan_size;
+
+-- Ensure memoize works for ANTI joins
+CREATE TABLE tab_anti (a int, b boolean);
+INSERT INTO tab_anti SELECT i%3, false FROM generate_series(1,100)i;
+ANALYZE tab_anti;
+
+-- Ensure we get a Memoize plan for ANTI join
+SELECT explain_memoize('
+SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN
+LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2
+ON t1.a+1 = t2.a
+WHERE t2.a IS NULL;', false);
+
+-- And check we get the expected results.
+SELECT COUNT(*) FROM tab_anti t1 LEFT JOIN
+LATERAL (SELECT DISTINCT ON (a) a, b, t1.a AS x FROM tab_anti t2) t2
+ON t1.a+1 = t2.a
+WHERE t2.a IS NULL;
+
+-- Ensure we do not add memoize node for SEMI join
+EXPLAIN (COSTS OFF)
+SELECT * FROM tab_anti t1 WHERE t1.a IN
+ (SELECT a FROM tab_anti t2 WHERE t2.b IN
+  (SELECT t1.b FROM tab_anti t3 WHERE t2.a > 1 OFFSET 0));
+
+DROP TABLE tab_anti;

From fd7d7b719137b5c427681a50c0a0ac2d745b68bd Mon Sep 17 00:00:00 2001
From: Michael Paquier <michael@paquier.xyz>
Date: Thu, 3 Jul 2025 11:14:20 +0900
Subject: [PATCH 2/4] Improve checks for GUC recovery_target_timeline

Currently check_recovery_target_timeline() converts any value that is
not "current", "latest", or a valid integer to 0.  So, for example, the
following configuration added to postgresql.conf followed by a startup:
recovery_target_timeline = 'bogus'
recovery_target_timeline = '9999999999'

...  results in the following error patterns:
FATAL:  22023: recovery target timeline 0 does not exist
FATAL:  22023: recovery target timeline 1410065407 does not exist

This is confusing, because the server does not reflect the intention of
the user, and just reports incorrect data unrelated to the GUC.

The origin of the problem is that we do not perform a range check in the
GUC value passed-in for recovery_target_timeline.  This commit improves
the situation by using strtou64() and by providing stricter range
checks.  Some test cases are added for the cases of an incorrect, an
upper-bound and a lower-bound timeline value, checking the sanity of the
reports based on the contents of the server logs.

Author: David Steele <david@pgmasters.net>
Discussion: https://postgr.es/m/e5d472c7-e9be-4710-8dc4-ebe721b62cea@pgbackrest.org
---
 src/backend/access/transam/xlogrecovery.c   | 18 ++++++--
 src/test/recovery/t/003_recovery_targets.pl | 50 +++++++++++++++++++++
 2 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/src/backend/access/transam/xlogrecovery.c b/src/backend/access/transam/xlogrecovery.c
index 6ce979f2d8bc4..93d389148549c 100644
--- a/src/backend/access/transam/xlogrecovery.c
+++ b/src/backend/access/transam/xlogrecovery.c
@@ -4994,13 +4994,25 @@ check_recovery_target_timeline(char **newval, void **extra, GucSource source)
 		rttg = RECOVERY_TARGET_TIMELINE_LATEST;
 	else
 	{
+		char	   *endp;
+		uint64		timeline;
+
 		rttg = RECOVERY_TARGET_TIMELINE_NUMERIC;
 
 		errno = 0;
-		strtoul(*newval, NULL, 0);
-		if (errno == EINVAL || errno == ERANGE)
+		timeline = strtou64(*newval, &endp, 0);
+
+		if (*endp != '\0' || errno == EINVAL || errno == ERANGE)
+		{
+			GUC_check_errdetail("\"%s\" is not a valid number.",
+								"recovery_target_timeline");
+			return false;
+		}
+
+		if (timeline < 1 || timeline > PG_UINT32_MAX)
 		{
-			GUC_check_errdetail("\"recovery_target_timeline\" is not a valid number.");
+			GUC_check_errdetail("\"%s\" must be between %u and %u.",
+								"recovery_target_timeline", 1, UINT_MAX);
 			return false;
 		}
 	}
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
index 0ae2e98272709..f2109efa9b12d 100644
--- a/src/test/recovery/t/003_recovery_targets.pl
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -187,4 +187,54 @@ sub test_recovery_standby
 	  qr/FATAL: .* recovery ended before configured recovery target was reached/,
 	'recovery end before target reached is a fatal error');
 
+# Invalid timeline target
+$node_standby = PostgreSQL::Test::Cluster->new('standby_9');
+$node_standby->init_from_backup($node_primary, 'my_backup',
+	has_restoring => 1);
+$node_standby->append_conf('postgresql.conf',
+	"recovery_target_timeline = 'bogus'");
+
+$res = run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node_standby->data_dir,
+		'--log' => $node_standby->logfile,
+		'start',
+	]);
+ok(!$res, 'invalid timeline target (bogus value)');
+
+my $log_start = $node_standby->wait_for_log("is not a valid number");
+
+# Timeline target out of min range
+$node_standby->append_conf('postgresql.conf',
+	"recovery_target_timeline = '0'");
+
+$res = run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node_standby->data_dir,
+		'--log' => $node_standby->logfile,
+		'start',
+	]);
+ok(!$res, 'invalid timeline target (lower bound check)');
+
+$log_start =
+  $node_standby->wait_for_log("must be between 1 and 4294967295", $log_start);
+
+# Timeline target out of max range
+$node_standby->append_conf('postgresql.conf',
+	"recovery_target_timeline = '4294967296'");
+
+$res = run_log(
+	[
+		'pg_ctl',
+		'--pgdata' => $node_standby->data_dir,
+		'--log' => $node_standby->logfile,
+		'start',
+	]);
+ok(!$res, 'invalid timeline target (upper bound check)');
+
+$log_start =
+  $node_standby->wait_for_log("must be between 1 and 4294967295", $log_start);
+
 done_testing();

From bc2f348e87c02de63647dbe290d64ff088880dbe Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Thu, 3 Jul 2025 15:27:26 +0900
Subject: [PATCH 3/4] Support multi-line headers in COPY FROM command.

The COPY FROM command now accepts a non-negative integer for the HEADER option,
allowing multiple header lines to be skipped. This is useful when the input
contains multi-line headers that should be ignored during data import.

Author: Shinya Kato <shinya11.kato@gmail.com>
Co-authored-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Yugo Nagata <nagata@sraoss.co.jp>
Discussion: https://postgr.es/m/CAOzEurRPxfzbxqeOPF_AGnAUOYf=Wk0we+1LQomPNUNtyZGBZw@mail.gmail.com
---
 doc/src/sgml/ref/copy.sgml           | 38 ++++++++++++++++++-------
 src/backend/commands/copy.c          | 42 +++++++++++++++++-----------
 src/backend/commands/copyfromparse.c | 17 ++++++++---
 src/backend/commands/copyto.c        |  2 +-
 src/include/commands/copy.h          | 16 +++++------
 src/test/regress/expected/copy.out   | 25 ++++++++++++++++-
 src/test/regress/expected/copy2.out  |  6 ++++
 src/test/regress/sql/copy.sql        | 30 ++++++++++++++++++++
 src/test/regress/sql/copy2.sql       |  3 ++
 src/tools/pgindent/typedefs.list     |  1 -
 10 files changed, 138 insertions(+), 42 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8433344e5b6f5..c2d1fbc1fbe94 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -37,7 +37,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     DELIMITER '<replaceable class="parameter">delimiter_character</replaceable>'
     NULL '<replaceable class="parameter">null_string</replaceable>'
     DEFAULT '<replaceable class="parameter">default_string</replaceable>'
-    HEADER [ <replaceable class="parameter">boolean</replaceable> | MATCH ]
+    HEADER [ <replaceable class="parameter">boolean</replaceable> | <replaceable class="parameter">integer</replaceable> | MATCH ]
     QUOTE '<replaceable class="parameter">quote_character</replaceable>'
     ESCAPE '<replaceable class="parameter">escape_character</replaceable>'
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
@@ -212,6 +212,15 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><replaceable class="parameter">integer</replaceable></term>
+    <listitem>
+     <para>
+      Specifies a non-negative integer value passed to the selected option.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>FORMAT</literal></term>
     <listitem>
@@ -303,16 +312,25 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     <term><literal>HEADER</literal></term>
     <listitem>
      <para>
-      Specifies that the file contains a header line with the names of each
-      column in the file.  On output, the first line contains the column
-      names from the table.  On input, the first line is discarded when this
-      option is set to <literal>true</literal> (or equivalent Boolean value).
-      If this option is set to <literal>MATCH</literal>, the number and names
-      of the columns in the header line must match the actual column names of
-      the table, in order;  otherwise an error is raised.
+      On output, if this option is set to <literal>true</literal>
+      (or an equivalent Boolean value), the first line of the output will
+      contain the column names from the table.
+      Integer values <literal>0</literal> and <literal>1</literal> are
+      accepted as Boolean values, but other integers are not allowed for
+      <command>COPY TO</command> commands.
+     </para>
+     <para>
+      On input, if this option is set to <literal>true</literal>
+      (or an equivalent Boolean value), the first line of the input is
+      discarded.  If set to a non-negative integer, that number of
+      lines are discarded.  If set to <literal>MATCH</literal>, the first line
+      is discarded, and it must contain column names that exactly match the
+      table's columns, in both number and order; otherwise, an error is raised.
+      The <literal>MATCH</literal> value is only valid for
+      <command>COPY FROM</command> commands.
+     </para>
+     <para>
       This option is not allowed when using <literal>binary</literal> format.
-      The <literal>MATCH</literal> option is only valid for <command>COPY
-      FROM</command> commands.
      </para>
     </listitem>
    </varlistentry>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 74ae42b19a710..fae9c41db6565 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -322,11 +322,13 @@ DoCopy(ParseState *pstate, const CopyStmt *stmt,
 }
 
 /*
- * Extract a CopyHeaderChoice value from a DefElem.  This is like
- * defGetBoolean() but also accepts the special value "match".
+ * Extract the CopyFormatOptions.header_line value from a DefElem.
+ *
+ * Parses the HEADER option for COPY, which can be a boolean, a non-negative
+ * integer (number of lines to skip), or the special value "match".
  */
-static CopyHeaderChoice
-defGetCopyHeaderChoice(DefElem *def, bool is_from)
+static int
+defGetCopyHeaderOption(DefElem *def, bool is_from)
 {
 	/*
 	 * If no parameter value given, assume "true" is meant.
@@ -335,20 +337,27 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 		return COPY_HEADER_TRUE;
 
 	/*
-	 * Allow 0, 1, "true", "false", "on", "off", or "match".
+	 * Allow 0, 1, "true", "false", "on", "off", a non-negative integer, or
+	 * "match".
 	 */
 	switch (nodeTag(def->arg))
 	{
 		case T_Integer:
-			switch (intVal(def->arg))
 			{
-				case 0:
-					return COPY_HEADER_FALSE;
-				case 1:
-					return COPY_HEADER_TRUE;
-				default:
-					/* otherwise, error out below */
-					break;
+				int			ival = intVal(def->arg);
+
+				if (ival < 0)
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("a negative integer value cannot be "
+									"specified for %s", def->defname)));
+
+				if (!is_from && ival > 1)
+					ereport(ERROR,
+							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+							 errmsg("cannot use multi-line header in COPY TO")));
+
+				return ival;
 			}
 			break;
 		default:
@@ -381,7 +390,8 @@ defGetCopyHeaderChoice(DefElem *def, bool is_from)
 	}
 	ereport(ERROR,
 			(errcode(ERRCODE_SYNTAX_ERROR),
-			 errmsg("%s requires a Boolean value or \"match\"",
+			 errmsg("%s requires a Boolean value, a non-negative integer, "
+					"or the string \"match\"",
 					def->defname)));
 	return COPY_HEADER_FALSE;	/* keep compiler quiet */
 }
@@ -566,7 +576,7 @@ ProcessCopyOptions(ParseState *pstate,
 			if (header_specified)
 				errorConflictingDefElem(defel, pstate);
 			header_specified = true;
-			opts_out->header_line = defGetCopyHeaderChoice(defel, is_from);
+			opts_out->header_line = defGetCopyHeaderOption(defel, is_from);
 		}
 		else if (strcmp(defel->defname, "quote") == 0)
 		{
@@ -769,7 +779,7 @@ ProcessCopyOptions(ParseState *pstate,
 				 errmsg("COPY delimiter cannot be \"%s\"", opts_out->delim)));
 
 	/* Check header */
-	if (opts_out->binary && opts_out->header_line)
+	if (opts_out->binary && opts_out->header_line != COPY_HEADER_FALSE)
 		ereport(ERROR,
 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 		/*- translator: %s is the name of a COPY option, e.g. ON_ERROR */
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f52f2477df129..b1ae97b833dff 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -771,21 +771,30 @@ static pg_attribute_always_inline bool
 NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
 {
 	int			fldct;
-	bool		done;
+	bool		done = false;
 
 	/* only available for text or csv input */
 	Assert(!cstate->opts.binary);
 
 	/* on input check that the header line is correct if needed */
-	if (cstate->cur_lineno == 0 && cstate->opts.header_line)
+	if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
 	{
 		ListCell   *cur;
 		TupleDesc	tupDesc;
+		int			lines_to_skip = cstate->opts.header_line;
+
+		/* If set to "match", one header line is skipped */
+		if (cstate->opts.header_line == COPY_HEADER_MATCH)
+			lines_to_skip = 1;
 
 		tupDesc = RelationGetDescr(cstate->rel);
 
-		cstate->cur_lineno++;
-		done = CopyReadLine(cstate, is_csv);
+		for (int i = 0; i < lines_to_skip; i++)
+		{
+			cstate->cur_lineno++;
+			if ((done = CopyReadLine(cstate, is_csv)))
+				break;
+		}
 
 		if (cstate->opts.header_line == COPY_HEADER_MATCH)
 		{
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index ea6f18f2c8008..67b94b91cae44 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -199,7 +199,7 @@ CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
 														  cstate->file_encoding);
 
 	/* if a header has been requested send the line */
-	if (cstate->opts.header_line)
+	if (cstate->opts.header_line == COPY_HEADER_TRUE)
 	{
 		ListCell   *cur;
 		bool		hdr_delim = false;
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 06dfdfef7210c..541176e198032 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -20,15 +20,12 @@
 #include "tcop/dest.h"
 
 /*
- * Represents whether a header line should be present, and whether it must
- * match the actual names (which implies "true").
+ * Represents whether a header line must match the actual names
+ * (which implies "true"), and whether it should be present.
  */
-typedef enum CopyHeaderChoice
-{
-	COPY_HEADER_FALSE = 0,
-	COPY_HEADER_TRUE,
-	COPY_HEADER_MATCH,
-} CopyHeaderChoice;
+#define COPY_HEADER_MATCH	-1
+#define COPY_HEADER_FALSE	0
+#define COPY_HEADER_TRUE	1
 
 /*
  * Represents where to save input processing errors.  More values to be added
@@ -64,7 +61,8 @@ typedef struct CopyFormatOptions
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
 	bool		csv_mode;		/* Comma Separated Value format? */
-	CopyHeaderChoice header_line;	/* header line? */
+	int			header_line;	/* number of lines to skip or COPY_HEADER_XXX
+								 * value (see the above) */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
 	int			null_print_len; /* length of same */
 	char	   *null_print_client;	/* same converted to file encoding */
diff --git a/src/test/regress/expected/copy.out b/src/test/regress/expected/copy.out
index 8d5a06563c44a..ac66eb55aeed4 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -81,6 +81,29 @@ copy copytest4 to stdout (header);
 c1	colname with tab: \t
 1	a
 2	b
+-- test multi-line header line feature
+create temp table copytest5 (c1 int);
+copy copytest5 from stdin (format csv, header 2);
+copy copytest5 to stdout (header);
+c1
+1
+2
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+select count(*) from copytest5;
+ count 
+-------
+     0
+(1 row)
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+select count(*) from copytest5;
+ count 
+-------
+     0
+(1 row)
+
 -- test copy from with a partitioned table
 create table parted_copytest (
 	a int,
@@ -224,7 +247,7 @@ alter table header_copytest add column c text;
 copy header_copytest to stdout with (header match);
 ERROR:  cannot use "match" with HEADER in COPY TO
 copy header_copytest from stdin with (header wrong_choice);
-ERROR:  header requires a Boolean value or "match"
+ERROR:  header requires a Boolean value, a non-negative integer, or the string "match"
 -- works
 copy header_copytest from stdin with (header match);
 copy header_copytest (c, a, b) from stdin with (header match);
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae8fd..caa3c44f0d0ca 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -132,6 +132,12 @@ COPY x from stdin with (reject_limit 1);
 ERROR:  COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
 COPY x from stdin with (on_error ignore, reject_limit 0);
 ERROR:  REJECT_LIMIT (0) must be greater than zero
+COPY x from stdin with (header -1);
+ERROR:  a negative integer value cannot be specified for header
+COPY x from stdin with (header 2.5);
+ERROR:  header requires a Boolean value, a non-negative integer, or the string "match"
+COPY x to stdout with (header 2);
+ERROR:  cannot use multi-line header in COPY TO
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
 ERROR:  column "d" specified more than once
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index f0b88a23db853..a1316c73bac69 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -94,6 +94,36 @@ this is just a line full of junk that would error out if parsed
 
 copy copytest4 to stdout (header);
 
+-- test multi-line header line feature
+
+create temp table copytest5 (c1 int);
+
+copy copytest5 from stdin (format csv, header 2);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+copy copytest5 to stdout (header);
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 4);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
+truncate copytest5;
+copy copytest5 from stdin (format csv, header 5);
+this is a first header line.
+this is a second header line.
+1
+2
+\.
+select count(*) from copytest5;
+
 -- test copy from with a partitioned table
 create table parted_copytest (
 	a int,
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce040..cef45868db511 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -90,6 +90,9 @@ COPY x to stdout (format BINARY, on_error unsupported);
 COPY x from stdin (log_verbosity unsupported);
 COPY x from stdin with (reject_limit 1);
 COPY x from stdin with (on_error ignore, reject_limit 0);
+COPY x from stdin with (header -1);
+COPY x from stdin with (header 2.5);
+COPY x to stdout with (header 2);
 
 -- too many columns in column list: should fail
 COPY x (a, b, c, d, e, d, c) from stdin;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 66c5782688a23..e7d1c48e1f20b 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -521,7 +521,6 @@ CopyFormatOptions
 CopyFromRoutine
 CopyFromState
 CopyFromStateData
-CopyHeaderChoice
 CopyInsertMethod
 CopyLogVerbosityChoice
 CopyMethod

From 170673a22f28bd6a1d3fa56e23cd74dcbcb60c17 Mon Sep 17 00:00:00 2001
From: Fujii Masao <fujii@postgresql.org>
Date: Thu, 3 Jul 2025 16:03:19 +0900
Subject: [PATCH 4/4] doc: Remove incorrect note about wal_status in
 pg_replication_slots.

The documentation previously stated that the wal_status column is NULL
if restart_lsn is NULL in the pg_replication_slots view. This is incorrect,
and wal_status can be "lost" even when restart_lsn is NULL.

This commit removes the incorrect description.

Back-patched to all supported versions.

Author: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Nisha Moond <nisha.moond412@gmail.com>
Discussion: https://postgr.es/m/c9d23cdc-b5dd-455a-8ee9-f1f24d701d89@oss.nttdata.com
Backpatch-through: 13
---
 doc/src/sgml/system-views.sgml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/src/sgml/system-views.sgml b/doc/src/sgml/system-views.sgml
index 986ae1f543dbd..82825db03bb2f 100644
--- a/doc/src/sgml/system-views.sgml
+++ b/doc/src/sgml/system-views.sgml
@@ -2832,8 +2832,7 @@ SELECT * FROM pg_locks pl LEFT JOIN pg_prepared_xacts ppx
        </itemizedlist>
        The last two states are seen only when
        <xref linkend="guc-max-slot-wal-keep-size"/> is
-       non-negative. If <structfield>restart_lsn</structfield> is NULL, this
-       field is null.
+       non-negative.
       </para></entry>
      </row>