Skip to content

Commit

Permalink
Tighten checks for whitespace in functions that parse identifiers etc.
Browse files Browse the repository at this point in the history
This patch replaces isspace() calls with scanner_isspace() in functions
that are likely to be presented with non-ASCII input.  isspace() has
the small advantage that it will correctly recognize no-break space
in single-byte encodings (such as LATIN1); but it cannot work successfully
for any multibyte character, and depending on platform it might return
false positive results for some fragments of multibyte characters.  That's
disastrous for functions that are trying to discard whitespace between
valid strings, as noted in bug #14662 from Justin Muise.  Even treating
no-break space as whitespace is pretty questionable for the usages touched
here, because the core scanner would think it is an identifier character.

Affected functions are parse_ident(), parseNameAndArgTypes (underlying
regprocedurein() and siblings), SplitIdentifierString (used for parsing
GUCs and options that are qualified names or lists of names), and
SplitDirectoriesString (used for parsing GUCs that are lists of
directories).

All the functions adjusted here are parsing SQL identifiers and similar
constructs, so it's reasonable to insist that their definition of
whitespace match the core scanner.  So we can hope that this won't cause
many backwards-compatibility problems.  I've left alone isspace() calls
in places that aren't really expecting any non-ASCII input characters,
such as float8in().

Back-patch to all supported branches.

Discussion: https://postgr.es/m/[email protected]
  • Loading branch information
tglsfdc committed May 24, 2017
1 parent f61bd73 commit 9ae2661
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 14 deletions.
6 changes: 3 additions & 3 deletions src/backend/utils/adt/misc.c
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ parse_ident(PG_FUNCTION_ARGS)
nextp = qualname_str;

/* skip leading whitespace */
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++;

for (;;)
Expand Down Expand Up @@ -858,14 +858,14 @@ parse_ident(PG_FUNCTION_ARGS)
text_to_cstring(qualname))));
}

while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++;

if (*nextp == '.')
{
after_dot = true;
nextp++;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++;
}
else if (*nextp == '\0')
Expand Down
7 changes: 4 additions & 3 deletions src/backend/utils/adt/regproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "parser/parse_type.h"
#include "parser/scansup.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/syscache.h"
Expand Down Expand Up @@ -1769,7 +1770,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
ptr2 = ptr + strlen(ptr);
while (--ptr2 > ptr)
{
if (!isspace((unsigned char) *ptr2))
if (!scanner_isspace(*ptr2))
break;
}
if (*ptr2 != ')')
Expand All @@ -1786,7 +1787,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
for (;;)
{
/* allow leading whitespace */
while (isspace((unsigned char) *ptr))
while (scanner_isspace(*ptr))
ptr++;
if (*ptr == '\0')
{
Expand Down Expand Up @@ -1842,7 +1843,7 @@ parseNameAndArgTypes(const char *string, bool allowNone, List **names,
/* Lop off trailing whitespace */
while (--ptr2 >= typename)
{
if (!isspace((unsigned char) *ptr2))
if (!scanner_isspace(*ptr2))
break;
*ptr2 = '\0';
}
Expand Down
16 changes: 8 additions & 8 deletions src/backend/utils/adt/varlena.c
Original file line number Diff line number Diff line change
Expand Up @@ -3252,7 +3252,7 @@ SplitIdentifierString(char *rawstring, char separator,

*namelist = NIL;

while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace */

if (*nextp == '\0')
Expand Down Expand Up @@ -3290,7 +3290,7 @@ SplitIdentifierString(char *rawstring, char separator,

curname = nextp;
while (*nextp && *nextp != separator &&
!isspace((unsigned char) *nextp))
!scanner_isspace(*nextp))
nextp++;
endp = nextp;
if (curname == nextp)
Expand All @@ -3312,13 +3312,13 @@ SplitIdentifierString(char *rawstring, char separator,
pfree(downname);
}

while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip trailing whitespace */

if (*nextp == separator)
{
nextp++;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace for next */
/* we expect another name, so done remains false */
}
Expand Down Expand Up @@ -3377,7 +3377,7 @@ SplitDirectoriesString(char *rawstring, char separator,

*namelist = NIL;

while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace */

if (*nextp == '\0')
Expand Down Expand Up @@ -3414,21 +3414,21 @@ SplitDirectoriesString(char *rawstring, char separator,
while (*nextp && *nextp != separator)
{
/* trailing whitespace should not be included in name */
if (!isspace((unsigned char) *nextp))
if (!scanner_isspace(*nextp))
endp = nextp + 1;
nextp++;
}
if (curname == endp)
return false; /* empty unquoted name not allowed */
}

while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip trailing whitespace */

if (*nextp == separator)
{
nextp++;
while (isspace((unsigned char) *nextp))
while (scanner_isspace(*nextp))
nextp++; /* skip leading whitespace for next */
/* we expect another name, so done remains false */
}
Expand Down

0 comments on commit 9ae2661

Please sign in to comment.