From 5c3fa1140ef5115d504d4a35fa810e403a6b61eb Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Sun, 18 May 2025 23:39:21 +0500 Subject: [PATCH 1/8] Fix handling of empty strings in .get_dtext, .get_qp_ctext and .get_qcontent --- Lib/email/_header_value_parser.py | 1 + Lib/test/test_email/test__header_value_parser.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 9a51b9437333db..bb1b2899baf5f8 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1024,6 +1024,7 @@ def _get_ptext_to_endchars(value, endchars): vchars = [] escape = False had_qp = False + pos = 0 for pos in range(len(fragment)): if fragment[pos] == '\\': if escape: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index ac12c3b2306f7d..3fac0e848df29b 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -463,6 +463,9 @@ def test_get_qp_ctext_non_printables(self): [errors.NonPrintableDefect], ')') self.assertEqual(ptext.defects[0].non_printables[0], '\x00') + def test_get_qp_ctext_empty(self): + self._test_get_x(parser.get_qp_ctext, '', '', ' ', [], '') + # get_qcontent def test_get_qcontent_only(self): @@ -503,6 +506,9 @@ def test_get_qcontent_non_printables(self): [errors.NonPrintableDefect], '"') self.assertEqual(ptext.defects[0].non_printables[0], '\x00') + def test_get_qcontent_empty(self): + self._test_get_x(parser.get_qcontent, '', '', '', [], '') + # get_atext def test_get_atext_only(self): @@ -1283,6 +1289,9 @@ def test_get_dtext_open_bracket_mid_word(self): self._test_get_x(parser.get_dtext, 'foo[bar', 'foo', 'foo', [], '[bar') + def test_get_dtext_empty(self): + self._test_get_x(parser.get_dtext, '', '', '', [], '') + # get_domain_literal def test_get_domain_literal_only(self): From 61fc40263b37f870ecafaa529e7f4d8a756e266a Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Sun, 18 May 2025 23:41:03 +0500 Subject: [PATCH 2/8] Fix appending of error while parsing domain literal --- Lib/email/_header_value_parser.py | 2 +- Lib/test/test_email/test__header_value_parser.py | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index bb1b2899baf5f8..7c3834f4011da2 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1574,7 +1574,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 3fac0e848df29b..da4741796e6b98 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2741,6 +2741,12 @@ def test_parse_valid_message_id(self): ) self.assertEqual(message_id.token_type, 'message-id') + def test_parse_message_id_with_invalid_domain(self): + message_id = parser.parse_message_id(" Date: Sun, 18 May 2025 23:46:38 +0500 Subject: [PATCH 3/8] Add news entry --- .../next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst diff --git a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst new file mode 100644 index 00000000000000..02a76a9276b7b7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst @@ -0,0 +1 @@ +Fix parsing of emails message_id with invalid domain. From 5174b653e27dfa3e04e2877b9b65a4c60733d1bf Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 19 May 2025 10:44:28 +0500 Subject: [PATCH 4/8] Update Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- .../next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst index 02a76a9276b7b7..9e248ac854e0d8 100644 --- a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst +++ b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst @@ -1 +1 @@ -Fix parsing of emails message_id with invalid domain. +:mod:`email`: Fix parsing of emails message ID with invalid domain. From cf0a4eace11051c1cecd56c4239ced24011c3f68 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 19 May 2025 22:10:22 +0500 Subject: [PATCH 5/8] Revert "Fix handling of empty strings in .get_dtext, .get_qp_ctext and .get_qcontent" This reverts commit 5c3fa1140ef5115d504d4a35fa810e403a6b61eb. --- Lib/email/_header_value_parser.py | 1 - Lib/test/test_email/test__header_value_parser.py | 9 --------- 2 files changed, 10 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 7c3834f4011da2..d1e8ec26ad2393 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1024,7 +1024,6 @@ def _get_ptext_to_endchars(value, endchars): vchars = [] escape = False had_qp = False - pos = 0 for pos in range(len(fragment)): if fragment[pos] == '\\': if escape: diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index da4741796e6b98..4cf2e891463baa 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -463,9 +463,6 @@ def test_get_qp_ctext_non_printables(self): [errors.NonPrintableDefect], ')') self.assertEqual(ptext.defects[0].non_printables[0], '\x00') - def test_get_qp_ctext_empty(self): - self._test_get_x(parser.get_qp_ctext, '', '', ' ', [], '') - # get_qcontent def test_get_qcontent_only(self): @@ -506,9 +503,6 @@ def test_get_qcontent_non_printables(self): [errors.NonPrintableDefect], '"') self.assertEqual(ptext.defects[0].non_printables[0], '\x00') - def test_get_qcontent_empty(self): - self._test_get_x(parser.get_qcontent, '', '', '', [], '') - # get_atext def test_get_atext_only(self): @@ -1289,9 +1283,6 @@ def test_get_dtext_open_bracket_mid_word(self): self._test_get_x(parser.get_dtext, 'foo[bar', 'foo', 'foo', [], '[bar') - def test_get_dtext_empty(self): - self._test_get_x(parser.get_dtext, '', '', '', [], '') - # get_domain_literal def test_get_domain_literal_only(self): From ab4de0d9fe47d74b1f67fe2614b112cace75b654 Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Mon, 26 May 2025 23:59:42 +0500 Subject: [PATCH 6/8] Add extra test for get_address --- .../test_email/test__header_value_parser.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 11891ab04c1ac0..1afaf4a2d0e047 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2491,6 +2491,24 @@ def test_get_address_quoted_strings_in_atom_list(self): self.assertEqual(address.all_mailboxes[0].domain, 'example.com') self.assertEqual(address.all_mailboxes[0].addr_spec, '"example example"@example.com') + def test_get_address_with_invalid_domain(self): + address = parser.get_address("' on angle-addr") + self.assertEqual(str(address[0].all_defects[1]), "end of input inside domain-literal") + + address = parser.get_address("!an??:=m==fr2@[C") + self.assertEqual(len(address), 2) + self.assertEqual(address[0].token_type, 'address') + self.assertEqual(address[0].all_mailboxes[0].local_part, '=m==fr2') + self.assertEqual(address[0].all_mailboxes[0].domain, '[C]') + self.assertEqual(address[0].all_mailboxes[0].addr_spec, '=m==fr2@[C]') + self.assertEqual(str(address[0].all_defects[0]), "end of header in group") + self.assertEqual(str(address[0].all_defects[1]), "end of input inside domain-literal") # get_address_list From ff5d410d6b22bebf0fe4e6353a1468c61d6ea41a Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 27 May 2025 00:22:05 +0500 Subject: [PATCH 7/8] Don't lost opening bracket in get_domain_literal + fix tests and news entry --- Lib/email/_header_value_parser.py | 2 +- .../test_email/test__header_value_parser.py | 55 +++++++++++++------ ...-05-18-23-46-21.gh-issue-134152.30HwbX.rst | 2 +- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 91de19c512324a..91243378dc0441 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -1594,9 +1594,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 1afaf4a2d0e047..dd78e1f2c55f08 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2492,23 +2492,35 @@ def test_get_address_quoted_strings_in_atom_list(self): self.assertEqual(address.all_mailboxes[0].addr_spec, '"example example"@example.com') def test_get_address_with_invalid_domain(self): - address = parser.get_address("' on angle-addr") - self.assertEqual(str(address[0].all_defects[1]), "end of input inside domain-literal") - - address = parser.get_address("!an??:=m==fr2@[C") - self.assertEqual(len(address), 2) - self.assertEqual(address[0].token_type, 'address') - self.assertEqual(address[0].all_mailboxes[0].local_part, '=m==fr2') - self.assertEqual(address[0].all_mailboxes[0].domain, '[C]') - self.assertEqual(address[0].all_mailboxes[0].addr_spec, '=m==fr2@[C]') - self.assertEqual(str(address[0].all_defects[0]), "end of header in group") - self.assertEqual(str(address[0].all_defects[1]), "end of input inside domain-literal") + address = self._test_get_x(parser.get_address, + '', + '', + [errors.InvalidHeaderDefect, # missing trailing '>' on angle-addr + errors.InvalidHeaderDefect, # end of input inside domain-literal + ], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertEqual(address.all_mailboxes[0].domain, '[]') + self.assertEqual(address.all_mailboxes[0].local_part, 'T') + self.assertEqual(address[0].token_type, 'invalid-mailbox') + + address = self._test_get_x(parser.get_address, + '!an??:=m==fr2@[C', + '!an??:=m==fr2@[C];', + '!an??:=m==fr2@[C];', + [errors.InvalidHeaderDefect, # end of header in group + errors.InvalidHeaderDefect, # end of input inside domain-literal + ], + '') + self.assertEqual(address.token_type, 'address') + self.assertEqual(len(address.mailboxes), 0) + self.assertEqual(len(address.all_mailboxes), 1) + self.assertEqual(address.all_mailboxes[0].domain, '[C]') + self.assertEqual(address.all_mailboxes[0].local_part, '=m==fr2') + self.assertEqual(address[0].token_type, 'group') # get_address_list @@ -2784,7 +2796,14 @@ def test_parse_valid_message_id(self): self.assertEqual(message_id.token_type, 'message-id') def test_parse_message_id_with_invalid_domain(self): - message_id = parser.parse_message_id("", + "", + [errors.ObsoleteHeaderDefect] + [errors.InvalidHeaderDefect] * 2, + [], + ) self.assertEqual(message_id.token_type, 'message-id') self.assertEqual(str(message_id.all_defects[-1]), "end of input inside domain-literal") diff --git a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst index 9e248ac854e0d8..911a4a59ea6079 100644 --- a/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst +++ b/Misc/NEWS.d/next/Library/2025-05-18-23-46-21.gh-issue-134152.30HwbX.rst @@ -1 +1 @@ -:mod:`email`: Fix parsing of emails message ID with invalid domain. +:mod:`email`: Fix parsing of email message ID with invalid domain. From be40930213a2492334fb1344b4da2c1403b3276b Mon Sep 17 00:00:00 2001 From: Sergey Miryanov Date: Tue, 27 May 2025 10:57:38 +0500 Subject: [PATCH 8/8] Tweak new tests for get_address --- Lib/test/test_email/test__header_value_parser.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index dd78e1f2c55f08..179e236ecdfd7f 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2505,6 +2505,7 @@ def test_get_address_with_invalid_domain(self): self.assertEqual(len(address.all_mailboxes), 1) self.assertEqual(address.all_mailboxes[0].domain, '[]') self.assertEqual(address.all_mailboxes[0].local_part, 'T') + self.assertEqual(address.all_mailboxes[0].token_type, 'invalid-mailbox') self.assertEqual(address[0].token_type, 'invalid-mailbox') address = self._test_get_x(parser.get_address, @@ -2520,6 +2521,7 @@ def test_get_address_with_invalid_domain(self): self.assertEqual(len(address.all_mailboxes), 1) self.assertEqual(address.all_mailboxes[0].domain, '[C]') self.assertEqual(address.all_mailboxes[0].local_part, '=m==fr2') + self.assertEqual(address.all_mailboxes[0].token_type, 'invalid-mailbox') self.assertEqual(address[0].token_type, 'group') # get_address_list