Skip to content

Commit ac57319

Browse files
davidbenBoringssl LUCI CQ
authored and
Boringssl LUCI CQ
committed
Rewrite ASN1_STRING_print_ex escaping.
The original implementation uses a table generated by a Perl script, and then relies on some subset of ASN1_STRFLGS_* constants overlapping with CHARTYPE_* constants, while masking off the ones that don't align. Allocating ASN1_STRFLGS_* constants is already complex with the XN_FLAG_* interaction. Avoid the additional CHARTYPE_* interaction by just writing out what it's recognizing in code. If you ignore CHARTYPE_PRINTABLESTRING (which is unused), that table is just recognizing 9 characters anyway. Also this gets charmap.h out of the way so I can clang-format every file in here without having to constantly exclude it. Change-Id: I73f31324e4b8a815887afba459e50ed091a9f999 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52729 Reviewed-by: Bob Beck <[email protected]> Commit-Queue: Bob Beck <[email protected]>
1 parent 97c6032 commit ac57319

File tree

3 files changed

+61
-245
lines changed

3 files changed

+61
-245
lines changed

crypto/asn1/a_strex.c

+61-95
Original file line numberDiff line numberDiff line change
@@ -64,96 +64,67 @@
6464
#include <openssl/bio.h>
6565
#include <openssl/mem.h>
6666

67-
#include "charmap.h"
6867
#include "internal.h"
6968

7069

71-
// These flags must be distinct from |ESC_FLAGS| and fit in a byte.
72-
73-
// Character is a valid PrintableString character
74-
#define CHARTYPE_PRINTABLESTRING 0x10
75-
// Character needs escaping if it is the first character
76-
#define CHARTYPE_FIRST_ESC_2253 0x20
77-
// Character needs escaping if it is the last character
78-
#define CHARTYPE_LAST_ESC_2253 0x40
79-
80-
#define CHARTYPE_BS_ESC (ASN1_STRFLGS_ESC_2253 | CHARTYPE_FIRST_ESC_2253 | CHARTYPE_LAST_ESC_2253)
81-
82-
#define ESC_FLAGS (ASN1_STRFLGS_ESC_2253 | \
83-
ASN1_STRFLGS_ESC_QUOTE | \
84-
ASN1_STRFLGS_ESC_CTRL | \
85-
ASN1_STRFLGS_ESC_MSB)
70+
#define ESC_FLAGS \
71+
(ASN1_STRFLGS_ESC_2253 | ASN1_STRFLGS_ESC_QUOTE | ASN1_STRFLGS_ESC_CTRL | \
72+
ASN1_STRFLGS_ESC_MSB)
8673

8774
static int maybe_write(BIO *out, const void *buf, int len)
8875
{
8976
/* If |out| is NULL, ignore the output but report the length. */
9077
return out == NULL || BIO_write(out, buf, len) == len;
9178
}
9279

93-
/*
94-
* This function handles display of strings, one character at a time. It is
95-
* passed an unsigned long for each character because it could come from 2 or
96-
* even 4 byte forms.
97-
*/
98-
99-
#define HEX_SIZE(type) (sizeof(type)*2)
100-
101-
static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
102-
BIO *out)
80+
static int is_control_character(unsigned char c)
10381
{
104-
unsigned char chflgs, chtmp;
105-
char tmphex[HEX_SIZE(uint32_t) + 3];
82+
return c < 32 || c == 127;
83+
}
10684

85+
static int do_esc_char(uint32_t c, unsigned long flags, char *do_quotes,
86+
BIO *out, int is_first, int is_last)
87+
{
88+
/* |c| is a |uint32_t| because, depending on |ASN1_STRFLGS_UTF8_CONVERT|,
89+
* we may be escaping bytes or Unicode codepoints. */
90+
char buf[16]; /* Large enough for "\\W01234567". */
91+
unsigned char u8 = (unsigned char)c;
10792
if (c > 0xffff) {
108-
BIO_snprintf(tmphex, sizeof tmphex, "\\W%08" PRIX32, c);
109-
if (!maybe_write(out, tmphex, 10))
110-
return -1;
111-
return 10;
112-
}
113-
if (c > 0xff) {
114-
BIO_snprintf(tmphex, sizeof tmphex, "\\U%04" PRIX32, c);
115-
if (!maybe_write(out, tmphex, 6))
116-
return -1;
117-
return 6;
118-
}
119-
chtmp = (unsigned char)c;
120-
if (chtmp > 0x7f)
121-
chflgs = flags & ASN1_STRFLGS_ESC_MSB;
122-
else
123-
chflgs = char_type[chtmp] & flags;
124-
if (chflgs & CHARTYPE_BS_ESC) {
125-
/* If we don't escape with quotes, signal we need quotes */
126-
if (chflgs & ASN1_STRFLGS_ESC_QUOTE) {
127-
if (do_quotes)
128-
*do_quotes = 1;
129-
if (!maybe_write(out, &chtmp, 1))
130-
return -1;
131-
return 1;
93+
BIO_snprintf(buf, sizeof(buf), "\\W%08" PRIX32, c);
94+
} else if (c > 0xff) {
95+
BIO_snprintf(buf, sizeof(buf), "\\U%04" PRIX32, c);
96+
} else if ((flags & ASN1_STRFLGS_ESC_MSB) && c > 0x7f) {
97+
BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
98+
} else if ((flags & ASN1_STRFLGS_ESC_CTRL) && is_control_character(c)) {
99+
BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
100+
} else if (flags & ASN1_STRFLGS_ESC_2253) {
101+
/* See RFC 2253, sections 2.4 and 4. */
102+
if (c == '\\' || c == '"') {
103+
/* Quotes and backslashes are always escaped, quoted or not. */
104+
BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
105+
} else if (c == ',' || c == '+' || c == '<' || c == '>' || c == ';' ||
106+
(is_first && (c == ' ' || c == '#')) ||
107+
(is_last && (c == ' '))) {
108+
if (flags & ASN1_STRFLGS_ESC_QUOTE) {
109+
/* No need to escape, just tell the caller to quote. */
110+
if (do_quotes != NULL) {
111+
*do_quotes = 1;
112+
}
113+
return maybe_write(out, &u8, 1) ? 1 : -1;
114+
}
115+
BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
116+
} else {
117+
return maybe_write(out, &u8, 1) ? 1 : -1;
132118
}
133-
if (!maybe_write(out, "\\", 1))
134-
return -1;
135-
if (!maybe_write(out, &chtmp, 1))
136-
return -1;
137-
return 2;
138-
}
139-
if (chflgs & (ASN1_STRFLGS_ESC_CTRL | ASN1_STRFLGS_ESC_MSB)) {
140-
BIO_snprintf(tmphex, 11, "\\%02X", chtmp);
141-
if (!maybe_write(out, tmphex, 3))
142-
return -1;
143-
return 3;
144-
}
145-
/*
146-
* If we get this far and do any escaping at all must escape the escape
147-
* character itself: backslash.
148-
*/
149-
if (chtmp == '\\' && flags & ESC_FLAGS) {
150-
if (!maybe_write(out, "\\\\", 2))
151-
return -1;
152-
return 2;
119+
} else if ((flags & ESC_FLAGS) && c == '\\') {
120+
/* If any escape flags are set, also escape backslashes. */
121+
BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
122+
} else {
123+
return maybe_write(out, &u8, 1) ? 1 : -1;
153124
}
154-
if (!maybe_write(out, &chtmp, 1))
155-
return -1;
156-
return 1;
125+
126+
int len = strlen(buf);
127+
return maybe_write(out, buf, len) ? len : -1;
157128
}
158129

159130
/*
@@ -163,7 +134,7 @@ static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
163134
*/
164135

165136
static int do_buf(const unsigned char *buf, int buflen, int encoding,
166-
int utf8_convert, unsigned char flags, char *quotes, BIO *out)
137+
int utf8_convert, unsigned long flags, char *quotes, BIO *out)
167138
{
168139
/* Reject invalid UCS-4 and UCS-2 lengths without parsing. */
169140
switch (encoding) {
@@ -185,10 +156,7 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
185156
const unsigned char *q = buf + buflen;
186157
int outlen = 0;
187158
while (p != q) {
188-
unsigned char orflags = 0;
189-
if (p == buf && flags & ASN1_STRFLGS_ESC_2253) {
190-
orflags = CHARTYPE_FIRST_ESC_2253;
191-
}
159+
const int is_first = p == buf;
192160
/* TODO(davidben): Replace this with |cbs_get_ucs2_be|, etc., to check
193161
* for invalid codepoints. Before doing that, enforce it in the parser,
194162
* https://crbug.com/boringssl/427, so these error cases are not
@@ -224,8 +192,7 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
224192
assert(0);
225193
return -1;
226194
}
227-
if (p == q && flags & ASN1_STRFLGS_ESC_2253)
228-
orflags = CHARTYPE_LAST_ESC_2253;
195+
const int is_last = p == q;
229196
if (utf8_convert) {
230197
unsigned char utfbuf[6];
231198
int utflen;
@@ -237,14 +204,15 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
237204
* otherwise each character will be > 0x7f and so the
238205
* character will never be escaped on first and last.
239206
*/
240-
int len = do_esc_char(utfbuf[i], flags | orflags, quotes, out);
207+
int len = do_esc_char(utfbuf[i], flags, quotes, out, is_first,
208+
is_last);
241209
if (len < 0) {
242210
return -1;
243211
}
244212
outlen += len;
245213
}
246214
} else {
247-
int len = do_esc_char(c, flags | orflags, quotes, out);
215+
int len = do_esc_char(c, flags, quotes, out, is_first, is_last);
248216
if (len < 0) {
249217
return -1;
250218
}
@@ -281,14 +249,14 @@ static int do_hex_dump(BIO *out, unsigned char *buf, int buflen)
281249
* encoding. This uses the RFC 2253 #01234 format.
282250
*/
283251

284-
static int do_dump(unsigned long lflags, BIO *out, const ASN1_STRING *str)
252+
static int do_dump(unsigned long flags, BIO *out, const ASN1_STRING *str)
285253
{
286254
if (!maybe_write(out, "#", 1)) {
287255
return -1;
288256
}
289257

290258
/* If we don't dump DER encoding just dump content octets */
291-
if (!(lflags & ASN1_STRFLGS_DUMP_DER)) {
259+
if (!(flags & ASN1_STRFLGS_DUMP_DER)) {
292260
int outlen = do_hex_dump(out, str->data, str->length);
293261
if (outlen < 0) {
294262
return -1;
@@ -362,13 +330,11 @@ static int string_type_to_encoding(int type) {
362330
* an error occurred.
363331
*/
364332

365-
int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
333+
int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags)
366334
{
367-
/* Keep a copy of escape flags */
368-
unsigned char flags = (unsigned char)(lflags & ESC_FLAGS);
369335
int type = str->type;
370336
int outlen = 0;
371-
if (lflags & ASN1_STRFLGS_SHOW_TYPE) {
337+
if (flags & ASN1_STRFLGS_SHOW_TYPE) {
372338
const char *tagname = ASN1_tag2str(type);
373339
outlen += strlen(tagname);
374340
if (!maybe_write(out, tagname, outlen) || !maybe_write(out, ":", 1))
@@ -378,29 +344,29 @@ int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
378344

379345
/* Decide what to do with |str|, either dump the contents or display it. */
380346
int encoding;
381-
if (lflags & ASN1_STRFLGS_DUMP_ALL) {
347+
if (flags & ASN1_STRFLGS_DUMP_ALL) {
382348
/* Dump everything. */
383349
encoding = -1;
384-
} else if (lflags & ASN1_STRFLGS_IGNORE_TYPE) {
350+
} else if (flags & ASN1_STRFLGS_IGNORE_TYPE) {
385351
/* Ignore the string type and interpret the contents as Latin-1. */
386352
encoding = MBSTRING_ASC;
387353
} else {
388354
encoding = string_type_to_encoding(type);
389-
if (encoding == -1 && (lflags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
355+
if (encoding == -1 && (flags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
390356
encoding = MBSTRING_ASC;
391357
}
392358
}
393359

394360
if (encoding == -1) {
395-
int len = do_dump(lflags, out, str);
361+
int len = do_dump(flags, out, str);
396362
if (len < 0)
397363
return -1;
398364
outlen += len;
399365
return outlen;
400366
}
401367

402368
int utf8_convert = 0;
403-
if (lflags & ASN1_STRFLGS_UTF8_CONVERT) {
369+
if (flags & ASN1_STRFLGS_UTF8_CONVERT) {
404370
/* If the string is UTF-8, skip decoding and just interpret it as 1 byte
405371
* per character to avoid converting twice.
406372
*

crypto/asn1/charmap.h

-15
This file was deleted.

0 commit comments

Comments
 (0)