9
9
10
10
from test import support
11
11
from test .support import os_helper
12
+ from test .support import warnings_helper
12
13
13
14
try :
14
15
import _testcapi
15
- except ImportError as exc :
16
+ except ImportError :
16
17
_testcapi = None
17
18
18
19
try :
@@ -113,7 +114,7 @@ def check_partial(self, input, partialresults):
113
114
q = Queue (b"" )
114
115
r = codecs .getreader (self .encoding )(q )
115
116
result = ""
116
- for (c , partialresult ) in zip (input .encode (self .encoding ), partialresults ):
117
+ for (c , partialresult ) in zip (input .encode (self .encoding ), partialresults , strict = True ):
117
118
q .write (bytes ([c ]))
118
119
result += r .read ()
119
120
self .assertEqual (result , partialresult )
@@ -124,7 +125,7 @@ def check_partial(self, input, partialresults):
124
125
# do the check again, this time using an incremental decoder
125
126
d = codecs .getincrementaldecoder (self .encoding )()
126
127
result = ""
127
- for (c , partialresult ) in zip (input .encode (self .encoding ), partialresults ):
128
+ for (c , partialresult ) in zip (input .encode (self .encoding ), partialresults , strict = True ):
128
129
result += d .decode (bytes ([c ]))
129
130
self .assertEqual (result , partialresult )
130
131
# check that there's nothing left in the buffers
@@ -134,7 +135,7 @@ def check_partial(self, input, partialresults):
134
135
# Check whether the reset method works properly
135
136
d .reset ()
136
137
result = ""
137
- for (c , partialresult ) in zip (input .encode (self .encoding ), partialresults ):
138
+ for (c , partialresult ) in zip (input .encode (self .encoding ), partialresults , strict = True ):
138
139
result += d .decode (bytes ([c ]))
139
140
self .assertEqual (result , partialresult )
140
141
# check that there's nothing left in the buffers
@@ -843,7 +844,7 @@ def test_bug691291(self):
843
844
self .addCleanup (os_helper .unlink , os_helper .TESTFN )
844
845
with open (os_helper .TESTFN , 'wb' ) as fp :
845
846
fp .write (s )
846
- with support .check_warnings (('' , DeprecationWarning )):
847
+ with warnings_helper .check_warnings (('' , DeprecationWarning )):
847
848
reader = codecs .open (os_helper .TESTFN , 'U' , encoding = self .encoding )
848
849
with reader :
849
850
self .assertEqual (reader .read (), s1 )
@@ -1814,6 +1815,22 @@ def test_register(self):
1814
1815
self .assertRaises (TypeError , codecs .register )
1815
1816
self .assertRaises (TypeError , codecs .register , 42 )
1816
1817
1818
+ def test_unregister (self ):
1819
+ name = "nonexistent_codec_name"
1820
+ search_function = mock .Mock ()
1821
+ codecs .register (search_function )
1822
+ self .assertRaises (TypeError , codecs .lookup , name )
1823
+ search_function .assert_called_with (name )
1824
+ search_function .reset_mock ()
1825
+
1826
+ codecs .unregister (search_function )
1827
+ self .assertRaises (LookupError , codecs .lookup , name )
1828
+ search_function .assert_not_called ()
1829
+
1830
+ # TODO: RUSTPYTHON, AttributeError: module '_winapi' has no attribute 'GetACP'
1831
+ if sys .platform == "win32" :
1832
+ test_unregister = unittest .expectedFailure (test_unregister )
1833
+
1817
1834
def test_lookup (self ):
1818
1835
self .assertRaises (TypeError , codecs .lookup )
1819
1836
self .assertRaises (LookupError , codecs .lookup , "__spam__" )
@@ -2544,7 +2561,16 @@ def test_unicode_escape(self):
2544
2561
(r"\x5c\x55\x30\x30\x31\x31\x30\x30\x30\x30" , 10 ))
2545
2562
2546
2563
2547
- class UnicodeEscapeTest (unittest .TestCase ):
2564
+ class UnicodeEscapeTest (ReadTest , unittest .TestCase ):
2565
+ encoding = "unicode-escape"
2566
+
2567
+ test_lone_surrogates = None
2568
+
2569
+ # TODO: RUSTPYTHON, TypeError: Expected type 'str', not 'bytes'
2570
+ @unittest .expectedFailure
2571
+ def test_incremental_surrogatepass (self ): # TODO: RUSTPYTHON, remove when this passes
2572
+ super ().test_incremental_surrogatepass () # TODO: RUSTPYTHON, remove when this passes
2573
+
2548
2574
def test_empty (self ):
2549
2575
self .assertEqual (codecs .unicode_escape_encode ("" ), (b"" , 0 ))
2550
2576
self .assertEqual (codecs .unicode_escape_decode (b"" ), ("" , 0 ))
@@ -2631,8 +2657,57 @@ def test_decode_errors(self):
2631
2657
self .assertEqual (decode (br"\U00110000" , "ignore" ), ("" , 10 ))
2632
2658
self .assertEqual (decode (br"\U00110000" , "replace" ), ("\ufffd " , 10 ))
2633
2659
2660
+ # TODO: RUSTPYTHON, UnicodeDecodeError: ('unicodeescape', b'\\', 0, 1, '\\ at end of string')
2661
+ @unittest .expectedFailure
2662
+ def test_partial (self ):
2663
+ self .check_partial (
2664
+ "\x00 \t \n \r \\ \xff \uffff \U00010000 " ,
2665
+ [
2666
+ '' ,
2667
+ '' ,
2668
+ '' ,
2669
+ '\x00 ' ,
2670
+ '\x00 ' ,
2671
+ '\x00 \t ' ,
2672
+ '\x00 \t ' ,
2673
+ '\x00 \t \n ' ,
2674
+ '\x00 \t \n ' ,
2675
+ '\x00 \t \n \r ' ,
2676
+ '\x00 \t \n \r ' ,
2677
+ '\x00 \t \n \r \\ ' ,
2678
+ '\x00 \t \n \r \\ ' ,
2679
+ '\x00 \t \n \r \\ ' ,
2680
+ '\x00 \t \n \r \\ ' ,
2681
+ '\x00 \t \n \r \\ \xff ' ,
2682
+ '\x00 \t \n \r \\ \xff ' ,
2683
+ '\x00 \t \n \r \\ \xff ' ,
2684
+ '\x00 \t \n \r \\ \xff ' ,
2685
+ '\x00 \t \n \r \\ \xff ' ,
2686
+ '\x00 \t \n \r \\ \xff ' ,
2687
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2688
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2689
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2690
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2691
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2692
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2693
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2694
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2695
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2696
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2697
+ '\x00 \t \n \r \\ \xff \uffff \U00010000 ' ,
2698
+ ]
2699
+ )
2700
+
2701
+ class RawUnicodeEscapeTest (ReadTest , unittest .TestCase ):
2702
+ encoding = "raw-unicode-escape"
2703
+
2704
+ test_lone_surrogates = None
2705
+
2706
+ # TODO: RUSTPYTHON, AssertionError: '\\' != ''
2707
+ @unittest .expectedFailure
2708
+ def test_incremental_surrogatepass (self ): # TODO: RUSTPYTHON, remove when this passes
2709
+ super ().test_incremental_surrogatepass () # TODO: RUSTPYTHON, remove when this passes
2634
2710
2635
- class RawUnicodeEscapeTest (unittest .TestCase ):
2636
2711
def test_empty (self ):
2637
2712
self .assertEqual (codecs .raw_unicode_escape_encode ("" ), (b"" , 0 ))
2638
2713
self .assertEqual (codecs .raw_unicode_escape_decode (b"" ), ("" , 0 ))
@@ -2681,6 +2756,37 @@ def test_decode_errors(self):
2681
2756
self .assertEqual (decode (br"\U00110000" , "ignore" ), ("" , 10 ))
2682
2757
self .assertEqual (decode (br"\U00110000" , "replace" ), ("\ufffd " , 10 ))
2683
2758
2759
+ # TODO: RUSTPYTHON, AssertionError: '\x00\t\n\r\\' != '\x00\t\n\r'
2760
+ @unittest .expectedFailure
2761
+ def test_partial (self ):
2762
+ self .check_partial (
2763
+ "\x00 \t \n \r \\ \xff \uffff \U00010000 " ,
2764
+ [
2765
+ '\x00 ' ,
2766
+ '\x00 \t ' ,
2767
+ '\x00 \t \n ' ,
2768
+ '\x00 \t \n \r ' ,
2769
+ '\x00 \t \n \r ' ,
2770
+ '\x00 \t \n \r \\ \xff ' ,
2771
+ '\x00 \t \n \r \\ \xff ' ,
2772
+ '\x00 \t \n \r \\ \xff ' ,
2773
+ '\x00 \t \n \r \\ \xff ' ,
2774
+ '\x00 \t \n \r \\ \xff ' ,
2775
+ '\x00 \t \n \r \\ \xff ' ,
2776
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2777
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2778
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2779
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2780
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2781
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2782
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2783
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2784
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2785
+ '\x00 \t \n \r \\ \xff \uffff ' ,
2786
+ '\x00 \t \n \r \\ \xff \uffff \U00010000 ' ,
2787
+ ]
2788
+ )
2789
+
2684
2790
2685
2791
class EscapeEncodeTest (unittest .TestCase ):
2686
2792
@@ -2889,7 +2995,7 @@ def test_buffer_api_usage(self):
2889
2995
view_decoded = codecs .decode (view , encoding )
2890
2996
self .assertEqual (view_decoded , data )
2891
2997
2892
- def test_text_to_binary_blacklists_binary_transforms (self ):
2998
+ def test_text_to_binary_denylists_binary_transforms (self ):
2893
2999
# Check binary -> binary codecs give a good error for str input
2894
3000
bad_input = "bad input type"
2895
3001
for encoding in bytes_transform_encodings :
@@ -2901,14 +3007,14 @@ def test_text_to_binary_blacklists_binary_transforms(self):
2901
3007
bad_input .encode (encoding )
2902
3008
self .assertIsNone (failure .exception .__cause__ )
2903
3009
2904
- def test_text_to_binary_blacklists_text_transforms (self ):
3010
+ def test_text_to_binary_denylists_text_transforms (self ):
2905
3011
# Check str.encode gives a good error message for str -> str codecs
2906
3012
msg = (r"^'rot_13' is not a text encoding; "
2907
3013
r"use codecs.encode\(\) to handle arbitrary codecs" )
2908
3014
with self .assertRaisesRegex (LookupError , msg ):
2909
3015
"just an example message" .encode ("rot_13" )
2910
3016
2911
- def test_binary_to_text_blacklists_binary_transforms (self ):
3017
+ def test_binary_to_text_denylists_binary_transforms (self ):
2912
3018
# Check bytes.decode and bytearray.decode give a good error
2913
3019
# message for binary -> binary codecs
2914
3020
data = b"encode first to ensure we meet any format restrictions"
@@ -2923,7 +3029,7 @@ def test_binary_to_text_blacklists_binary_transforms(self):
2923
3029
with self .assertRaisesRegex (LookupError , msg ):
2924
3030
bytearray (encoded_data ).decode (encoding )
2925
3031
2926
- def test_binary_to_text_blacklists_text_transforms (self ):
3032
+ def test_binary_to_text_denylists_text_transforms (self ):
2927
3033
# Check str -> str codec gives a good error for binary input
2928
3034
for bad_input in (b"immutable" , bytearray (b"mutable" )):
2929
3035
with self .subTest (bad_input = bad_input ):
@@ -2991,29 +3097,14 @@ def test_uu_invalid(self):
2991
3097
2992
3098
def _get_test_codec (codec_name ):
2993
3099
return _TEST_CODECS .get (codec_name )
2994
- codecs .register (_get_test_codec ) # Returns None, not usable as a decorator
2995
-
2996
- try :
2997
- # Issue #22166: Also need to clear the internal cache in CPython
2998
- from _codecs import _forget_codec
2999
- except ImportError :
3000
- def _forget_codec (codec_name ):
3001
- pass
3002
3100
3003
3101
3004
3102
class ExceptionChainingTest (unittest .TestCase ):
3005
3103
3006
3104
def setUp (self ):
3007
- # There's no way to unregister a codec search function, so we just
3008
- # ensure we render this one fairly harmless after the test
3009
- # case finishes by using the test case repr as the codec name
3010
- # The codecs module normalizes codec names, although this doesn't
3011
- # appear to be formally documented...
3012
- # We also make sure we use a truly unique id for the custom codec
3013
- # to avoid issues with the codec cache when running these tests
3014
- # multiple times (e.g. when hunting for refleaks)
3015
- unique_id = repr (self ) + str (id (self ))
3016
- self .codec_name = encodings .normalize_encoding (unique_id ).lower ()
3105
+ self .codec_name = 'exception_chaining_test'
3106
+ codecs .register (_get_test_codec )
3107
+ self .addCleanup (codecs .unregister , _get_test_codec )
3017
3108
3018
3109
# We store the object to raise on the instance because of a bad
3019
3110
# interaction between the codec caching (which means we can't
@@ -3028,10 +3119,6 @@ def tearDown(self):
3028
3119
_TEST_CODECS .pop (self .codec_name , None )
3029
3120
# Issue #22166: Also pop from caches to avoid appearance of ref leaks
3030
3121
encodings ._cache .pop (self .codec_name , None )
3031
- try :
3032
- _forget_codec (self .codec_name )
3033
- except KeyError :
3034
- pass
3035
3122
3036
3123
def set_codec (self , encode , decode ):
3037
3124
codec_info = codecs .CodecInfo (encode , decode ,
@@ -3710,5 +3797,46 @@ def test_rot13_func(self):
3710
3797
'To be, or not to be, that is the question' )
3711
3798
3712
3799
3800
+ class CodecNameNormalizationTest (unittest .TestCase ):
3801
+ """Test codec name normalization"""
3802
+ # TODO: RUSTPYTHON, AssertionError: Tuples differ: (1, 2, 3, 4) != (None, None, None, None)
3803
+ @unittest .expectedFailure
3804
+ def test_codecs_lookup (self ):
3805
+ FOUND = (1 , 2 , 3 , 4 )
3806
+ NOT_FOUND = (None , None , None , None )
3807
+ def search_function (encoding ):
3808
+ if encoding == "aaa_8" :
3809
+ return FOUND
3810
+ else :
3811
+ return NOT_FOUND
3812
+
3813
+ codecs .register (search_function )
3814
+ self .addCleanup (codecs .unregister , search_function )
3815
+ self .assertEqual (FOUND , codecs .lookup ('aaa_8' ))
3816
+ self .assertEqual (FOUND , codecs .lookup ('AAA-8' ))
3817
+ self .assertEqual (FOUND , codecs .lookup ('AAA---8' ))
3818
+ self .assertEqual (FOUND , codecs .lookup ('AAA 8' ))
3819
+ self .assertEqual (FOUND , codecs .lookup ('aaa\xe9 \u20ac -8' ))
3820
+ self .assertEqual (NOT_FOUND , codecs .lookup ('AAA.8' ))
3821
+ self .assertEqual (NOT_FOUND , codecs .lookup ('AAA...8' ))
3822
+ self .assertEqual (NOT_FOUND , codecs .lookup ('BBB-8' ))
3823
+ self .assertEqual (NOT_FOUND , codecs .lookup ('BBB.8' ))
3824
+ self .assertEqual (NOT_FOUND , codecs .lookup ('a\xe9 \u20ac -8' ))
3825
+
3826
+ # TODO: RUSTPYTHON, AssertionError
3827
+ @unittest .expectedFailure
3828
+ def test_encodings_normalize_encoding (self ):
3829
+ # encodings.normalize_encoding() ignores non-ASCII characters.
3830
+ normalize = encodings .normalize_encoding
3831
+ self .assertEqual (normalize ('utf_8' ), 'utf_8' )
3832
+ self .assertEqual (normalize ('utf\xE9 \u20AC \U0010ffff -8' ), 'utf_8' )
3833
+ self .assertEqual (normalize ('utf 8' ), 'utf_8' )
3834
+ # encodings.normalize_encoding() doesn't convert
3835
+ # characters to lower case.
3836
+ self .assertEqual (normalize ('UTF 8' ), 'UTF_8' )
3837
+ self .assertEqual (normalize ('utf.8' ), 'utf.8' )
3838
+ self .assertEqual (normalize ('utf...8' ), 'utf...8' )
3839
+
3840
+
3713
3841
if __name__ == "__main__" :
3714
3842
unittest .main ()
0 commit comments