sthagen
diff --git a/‎Lib/test/test_importlib/test_locks.py
Lines changed: 1 addition & 0 deletions b/‎Lib/test/test_importlib/test_locks.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎Lib/test/test_threading.py
Lines changed: 5 additions & 0 deletions b/‎Lib/test/test_threading.py
Lines changed: 5 additions & 0 deletions
diff --git a/‎Lib/test/test_utf8_mode.py
Lines changed: 271 additions & 0 deletions b/‎Lib/test/test_utf8_mode.py
Lines changed: 271 additions & 0 deletions
diff --git a/‎Lib/test/test_utf8source.py
Lines changed: 47 additions & 0 deletions b/‎Lib/test/test_utf8source.py
Lines changed: 47 additions & 0 deletions
@@ -96,6 +96,7 @@ def test_deadlock(self):
         self.assertGreaterEqual(nb_deadlocks, 1)
         self.assertEqual(results.count((True, True)), len(results) - nb_deadlocks)
 
+    @unittest.skip("TODO: RUSTPYTHON, flaky test")
     def test_no_deadlock(self):
         results = self.run_deadlock_avoidance_test(False)
         self.assertEqual(results.count((True, False)), 0)
 
@@ -1329,6 +1329,11 @@ class PyRLockTests(lock_tests.RLockTests):
 class CRLockTests(lock_tests.RLockTests):
     locktype = staticmethod(threading._CRLock)
 
+    # TODO: RUSTPYTHON
+    @unittest.skip("TODO: RUSTPYTHON, flaky test")
+    def test_different_thread(self):
+        super().test_different_thread()
+
     # TODO: RUSTPYTHON
     @unittest.expectedFailure
     def test_release_save_unacquired(self):
 
@@ -0,0 +1,271 @@
+"""
+Test the implementation of the PEP 540: the UTF-8 Mode.
+"""
+
+import locale
+import sys
+import textwrap
+import unittest
+from test import support
+from test.support.script_helper import assert_python_ok, assert_python_failure
+
+
+MS_WINDOWS = (sys.platform == 'win32')
+POSIX_LOCALES = ('C', 'POSIX')
+VXWORKS = (sys.platform == "vxworks")
+
+class UTF8ModeTests(unittest.TestCase):
+    DEFAULT_ENV = {
+        'PYTHONUTF8': '',
+        'PYTHONLEGACYWINDOWSFSENCODING': '',
+        'PYTHONCOERCECLOCALE': '0',
+    }
+
+    def posix_locale(self):
+        loc = locale.setlocale(locale.LC_CTYPE, None)
+        return (loc in POSIX_LOCALES)
+
+    def get_output(self, *args, failure=False, **kw):
+        kw = dict(self.DEFAULT_ENV, **kw)
+        if failure:
+            out = assert_python_failure(*args, **kw)
+            out = out[2]
+        else:
+            out = assert_python_ok(*args, **kw)
+            out = out[1]
+        return out.decode().rstrip("\n\r")
+
+    @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale')
+    def test_posix_locale(self):
+        code = 'import sys; print(sys.flags.utf8_mode)'
+
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                out = self.get_output('-c', code, LC_ALL=loc)
+                self.assertEqual(out, '1')
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_xoption(self):
+        code = 'import sys; print(sys.flags.utf8_mode)'
+
+        out = self.get_output('-X', 'utf8', '-c', code)
+        self.assertEqual(out, '1')
+
+        # undocumented but accepted syntax: -X utf8=1
+        out = self.get_output('-X', 'utf8=1', '-c', code)
+        self.assertEqual(out, '1')
+
+        out = self.get_output('-X', 'utf8=0', '-c', code)
+        self.assertEqual(out, '0')
+
+        if MS_WINDOWS:
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode
+            # and has the priority over -X utf8
+            out = self.get_output('-X', 'utf8', '-c', code,
+                                  PYTHONLEGACYWINDOWSFSENCODING='1')
+            self.assertEqual(out, '0')
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_env_var(self):
+        code = 'import sys; print(sys.flags.utf8_mode)'
+
+        out = self.get_output('-c', code, PYTHONUTF8='1')
+        self.assertEqual(out, '1')
+
+        out = self.get_output('-c', code, PYTHONUTF8='0')
+        self.assertEqual(out, '0')
+
+        # -X utf8 has the priority over PYTHONUTF8
+        out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1')
+        self.assertEqual(out, '0')
+
+        if MS_WINDOWS:
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
+            # and has the priority over PYTHONUTF8
+            out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1',
+                                  PYTHONLEGACYWINDOWSFSENCODING='1')
+            self.assertEqual(out, '0')
+
+        # Cannot test with the POSIX locale, since the POSIX locale enables
+        # the UTF-8 mode
+        if not self.posix_locale():
+            # PYTHONUTF8 should be ignored if -E is used
+            out = self.get_output('-E', '-c', code, PYTHONUTF8='1')
+            self.assertEqual(out, '0')
+
+        # invalid mode
+        out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True)
+        self.assertIn('invalid PYTHONUTF8 environment variable value',
+                      out.rstrip())
+
+    def test_filesystemencoding(self):
+        code = textwrap.dedent('''
+            import sys
+            print("{}/{}".format(sys.getfilesystemencoding(),
+                                 sys.getfilesystemencodeerrors()))
+        ''')
+
+        if MS_WINDOWS:
+            expected = 'utf-8/surrogatepass'
+        else:
+            expected = 'utf-8/surrogateescape'
+
+        out = self.get_output('-X', 'utf8', '-c', code)
+        self.assertEqual(out, expected)
+
+        if MS_WINDOWS:
+            # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode
+            # and has the priority over -X utf8 and PYTHONUTF8
+            out = self.get_output('-X', 'utf8', '-c', code,
+                                  PYTHONUTF8='strict',
+                                  PYTHONLEGACYWINDOWSFSENCODING='1')
+            self.assertEqual(out, 'mbcs/replace')
+
+    # TODO: RUSTPYTHON
+    if MS_WINDOWS:
+        test_filesystemencoding = unittest.expectedFailure(test_filesystemencoding)
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_stdio(self):
+        code = textwrap.dedent('''
+            import sys
+            print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}")
+            print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}")
+            print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}")
+        ''')
+
+        out = self.get_output('-X', 'utf8', '-c', code,
+                              PYTHONIOENCODING='')
+        self.assertEqual(out.splitlines(),
+                         ['stdin: utf-8/surrogateescape',
+                          'stdout: utf-8/surrogateescape',
+                          'stderr: utf-8/backslashreplace'])
+
+        # PYTHONIOENCODING has the priority over PYTHONUTF8
+        out = self.get_output('-X', 'utf8', '-c', code,
+                              PYTHONIOENCODING="latin1")
+        self.assertEqual(out.splitlines(),
+                         ['stdin: iso8859-1/strict',
+                          'stdout: iso8859-1/strict',
+                          'stderr: iso8859-1/backslashreplace'])
+
+        out = self.get_output('-X', 'utf8', '-c', code,
+                              PYTHONIOENCODING=":namereplace")
+        self.assertEqual(out.splitlines(),
+                         ['stdin: utf-8/namereplace',
+                          'stdout: utf-8/namereplace',
+                          'stderr: utf-8/backslashreplace'])
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_io(self):
+        code = textwrap.dedent('''
+            import sys
+            filename = sys.argv[1]
+            with open(filename) as fp:
+                print(f"{fp.encoding}/{fp.errors}")
+        ''')
+        filename = __file__
+
+        out = self.get_output('-c', code, filename, PYTHONUTF8='1')
+        self.assertEqual(out, 'UTF-8/strict')
+
+    def _check_io_encoding(self, module, encoding=None, errors=None):
+        filename = __file__
+
+        # Encoding explicitly set
+        args = []
+        if encoding:
+            args.append(f'encoding={encoding!r}')
+        if errors:
+            args.append(f'errors={errors!r}')
+        code = textwrap.dedent('''
+            import sys
+            from %s import open
+            filename = sys.argv[1]
+            with open(filename, %s) as fp:
+                print(f"{fp.encoding}/{fp.errors}")
+        ''') % (module, ', '.join(args))
+        out = self.get_output('-c', code, filename,
+                              PYTHONUTF8='1')
+
+        if not encoding:
+            encoding = 'UTF-8'
+        if not errors:
+            errors = 'strict'
+        self.assertEqual(out, f'{encoding}/{errors}')
+
+    def check_io_encoding(self, module):
+        self._check_io_encoding(module, encoding="latin1")
+        self._check_io_encoding(module, errors="namereplace")
+        self._check_io_encoding(module,
+                                encoding="latin1", errors="namereplace")
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_io_encoding(self):
+        self.check_io_encoding('io')
+
+    def test_pyio_encoding(self):
+        self.check_io_encoding('_pyio')
+
+    def test_locale_getpreferredencoding(self):
+        code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
+        out = self.get_output('-X', 'utf8', '-c', code)
+        self.assertEqual(out, 'UTF-8 UTF-8')
+
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
+                self.assertEqual(out, 'UTF-8 UTF-8')
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    @unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
+    def test_cmd_line(self):
+        arg = 'h\xe9\u20ac'.encode('utf-8')
+        arg_utf8 = arg.decode('utf-8')
+        arg_ascii = arg.decode('ascii', 'surrogateescape')
+        code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))'
+
+        def check(utf8_opt, expected, **kw):
+            out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw)
+            args = out.partition(':')[2].rstrip()
+            self.assertEqual(args, ascii(expected), out)
+
+        check('utf8', [arg_utf8])
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                check('utf8', [arg_utf8], LC_ALL=loc)
+
+        if sys.platform == 'darwin' or support.is_android or VXWORKS:
+            c_arg = arg_utf8
+        elif sys.platform.startswith("aix"):
+            c_arg = arg.decode('iso-8859-1')
+        else:
+            c_arg = arg_ascii
+        for loc in POSIX_LOCALES:
+            with self.subTest(LC_ALL=loc):
+                check('utf8=0', [c_arg], LC_ALL=loc)
+
+    def test_optim_level(self):
+        # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag
+        # twice when -X utf8 requires to parse the configuration twice (when
+        # the encoding changes after reading the configuration, the
+        # configuration is read again with the new encoding).
+        code = 'import sys; print(sys.flags.optimize)'
+        out = self.get_output('-X', 'utf8', '-O', '-c', code)
+        self.assertEqual(out, '1')
+        out = self.get_output('-X', 'utf8', '-OO', '-c', code)
+        self.assertEqual(out, '2')
+
+        code = 'import sys; print(sys.flags.ignore_environment)'
+        out = self.get_output('-X', 'utf8', '-E', '-c', code)
+        self.assertEqual(out, '1')
+
+
+if __name__ == "__main__":
+    unittest.main()
@@ -0,0 +1,47 @@
+# This file is marked as binary in the CVS, to prevent MacCVS from recoding it.
+
+import unittest
+
+class PEP3120Test(unittest.TestCase):
+
+    def test_pep3120(self):
+        self.assertEqual(
+            "Питон".encode("utf-8"),
+            b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
+        )
+        self.assertEqual(
+            "\П".encode("utf-8"),
+            b'\\\xd0\x9f'
+        )
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure # "badsyntax_pep3120.py" may make the WASM CI fail
+    def test_badsyntax(self):
+        try:
+            import test.badsyntax_pep3120
+        except SyntaxError as msg:
+            msg = str(msg).lower()
+            self.assertTrue('utf-8' in msg)
+        else:
+            self.fail("expected exception didn't occur")
+
+
+class BuiltinCompileTests(unittest.TestCase):
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    # Issue 3574.
+    def test_latin1(self):
+        # Allow compile() to read Latin-1 source.
+        source_code = '# coding: Latin-1\nu = "Ç"\n'.encode("Latin-1")
+        try:
+            code = compile(source_code, '<dummy>', 'exec')
+        except SyntaxError:
+            self.fail("compile() cannot handle Latin-1 source")
+        ns = {}
+        exec(code, ns)
+        self.assertEqual('Ç', ns['u'])
+
+
+if __name__ == "__main__":
+    unittest.main()