Merge pull request RustPython#2419 from ishigoya/master

youknowone · web-flow · commit dd8d3c1bfd33 · 2021-01-29T14:20:38.000+09:00
remaining test_unicode* from v3.8.7
diff --git a/Lib/test/test_unicode_file.py b/Lib/test/test_unicode_file.py
@@ -0,0 +1,141 @@
+# Test some Unicode file name semantics
+# We don't test many operations on files other than
+# that their names can be used with Unicode characters.
+import os, glob, time, shutil
+import unicodedata
+
+import unittest
+from test.support import (run_unittest, rmtree, change_cwd,
+    TESTFN_ENCODING, TESTFN_UNICODE, TESTFN_UNENCODABLE, create_empty_file)
+
+if not os.path.supports_unicode_filenames:
+    try:
+        TESTFN_UNICODE.encode(TESTFN_ENCODING)
+    except (UnicodeError, TypeError):
+        # Either the file system encoding is None, or the file name
+        # cannot be encoded in the file system encoding.
+        raise unittest.SkipTest("No Unicode filesystem semantics on this platform.")
+
+def remove_if_exists(filename):
+    if os.path.exists(filename):
+        os.unlink(filename)
+
+class TestUnicodeFiles(unittest.TestCase):
+    # The 'do_' functions are the actual tests.  They generally assume the
+    # file already exists etc.
+
+    # Do all the tests we can given only a single filename.  The file should
+    # exist.
+    def _do_single(self, filename):
+        self.assertTrue(os.path.exists(filename))
+        self.assertTrue(os.path.isfile(filename))
+        self.assertTrue(os.access(filename, os.R_OK))
+        self.assertTrue(os.path.exists(os.path.abspath(filename)))
+        self.assertTrue(os.path.isfile(os.path.abspath(filename)))
+        self.assertTrue(os.access(os.path.abspath(filename), os.R_OK))
+        os.chmod(filename, 0o777)
+        os.utime(filename, None)
+        os.utime(filename, (time.time(), time.time()))
+        # Copy/rename etc tests using the same filename
+        self._do_copyish(filename, filename)
+        # Filename should appear in glob output
+        self.assertTrue(
+            os.path.abspath(filename)==os.path.abspath(glob.glob(glob.escape(filename))[0]))
+        # basename should appear in listdir.
+        path, base = os.path.split(os.path.abspath(filename))
+        file_list = os.listdir(path)
+        # Normalize the unicode strings, as round-tripping the name via the OS
+        # may return a different (but equivalent) value.
+        base = unicodedata.normalize("NFD", base)
+        file_list = [unicodedata.normalize("NFD", f) for f in file_list]
+
+        self.assertIn(base, file_list)
+
+    # Tests that copy, move, etc one file to another.
+    def _do_copyish(self, filename1, filename2):
+        # Should be able to rename the file using either name.
+        self.assertTrue(os.path.isfile(filename1)) # must exist.
+        os.rename(filename1, filename2 + ".new")
+        self.assertFalse(os.path.isfile(filename2))
+        self.assertTrue(os.path.isfile(filename1 + '.new'))
+        os.rename(filename1 + ".new", filename2)
+        self.assertFalse(os.path.isfile(filename1 + '.new'))
+        self.assertTrue(os.path.isfile(filename2))
+
+        shutil.copy(filename1, filename2 + ".new")
+        os.unlink(filename1 + ".new") # remove using equiv name.
+        # And a couple of moves, one using each name.
+        shutil.move(filename1, filename2 + ".new")
+        self.assertFalse(os.path.exists(filename2))
+        self.assertTrue(os.path.exists(filename1 + '.new'))
+        shutil.move(filename1 + ".new", filename2)
+        self.assertFalse(os.path.exists(filename2 + '.new'))
+        self.assertTrue(os.path.exists(filename1))
+        # Note - due to the implementation of shutil.move,
+        # it tries a rename first.  This only fails on Windows when on
+        # different file systems - and this test can't ensure that.
+        # So we test the shutil.copy2 function, which is the thing most
+        # likely to fail.
+        shutil.copy2(filename1, filename2 + ".new")
+        self.assertTrue(os.path.isfile(filename1 + '.new'))
+        os.unlink(filename1 + ".new")
+        self.assertFalse(os.path.exists(filename2 + '.new'))
+
+    def _do_directory(self, make_name, chdir_name):
+        if os.path.isdir(make_name):
+            rmtree(make_name)
+        os.mkdir(make_name)
+        try:
+            with change_cwd(chdir_name):
+                cwd_result = os.getcwd()
+                name_result = make_name
+
+                cwd_result = unicodedata.normalize("NFD", cwd_result)
+                name_result = unicodedata.normalize("NFD", name_result)
+
+                self.assertEqual(os.path.basename(cwd_result),name_result)
+        finally:
+            os.rmdir(make_name)
+
+    # The '_test' functions 'entry points with params' - ie, what the
+    # top-level 'test' functions would be if they could take params
+    def _test_single(self, filename):
+        remove_if_exists(filename)
+        create_empty_file(filename)
+        try:
+            self._do_single(filename)
+        finally:
+            os.unlink(filename)
+        self.assertTrue(not os.path.exists(filename))
+        # and again with os.open.
+        f = os.open(filename, os.O_CREAT)
+        os.close(f)
+        try:
+            self._do_single(filename)
+        finally:
+            os.unlink(filename)
+
+    # The 'test' functions are unittest entry points, and simply call our
+    # _test functions with each of the filename combinations we wish to test
+    @unittest.skip("TODO: RUSTPYTHON")
+    def test_single_files(self):
+        self._test_single(TESTFN_UNICODE)
+        if TESTFN_UNENCODABLE is not None:
+            self._test_single(TESTFN_UNENCODABLE)
+
+    def test_directories(self):
+        # For all 'equivalent' combinations:
+        #  Make dir with encoded, chdir with unicode, checkdir with encoded
+        #  (or unicode/encoded/unicode, etc
+        ext = ".dir"
+        self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext)
+        # Our directory name that can't use a non-unicode name.
+        if TESTFN_UNENCODABLE is not None:
+            self._do_directory(TESTFN_UNENCODABLE+ext,
+                               TESTFN_UNENCODABLE+ext)
+
+def test_main():
+    run_unittest(__name__)
+
+if __name__ == "__main__":
+    test_main()
diff --git a/Lib/test/test_unicode_file_functions.py b/Lib/test/test_unicode_file_functions.py
@@ -0,0 +1,198 @@
+# Test the Unicode versions of normal file functions
+# open, os.open, os.stat. os.listdir, os.rename, os.remove, os.mkdir, os.chdir, os.rmdir
+import os
+import sys
+import unittest
+import warnings
+from unicodedata import normalize
+from test import support
+
+filenames = [
+    '1_abc',
+    '2_ascii',
+    '3_Gr\xfc\xdf-Gott',
+    '4_\u0393\u03b5\u03b9\u03ac-\u03c3\u03b1\u03c2',
+    '5_\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435',
+    '6_\u306b\u307d\u3093',
+    '7_\u05d4\u05e9\u05e7\u05e6\u05e5\u05e1',
+    '8_\u66e8\u66e9\u66eb',
+    '9_\u66e8\u05e9\u3093\u0434\u0393\xdf',
+    # Specific code points: fn, NFC(fn) and NFKC(fn) all different
+    '10_\u1fee\u1ffd',
+    ]
+
+# Mac OS X decomposes Unicode names, using Normal Form D.
+# http://developer.apple.com/mac/library/qa/qa2001/qa1173.html
+# "However, most volume formats do not follow the exact specification for
+# these normal forms.  For example, HFS Plus uses a variant of Normal Form D
+# in which U+2000 through U+2FFF, U+F900 through U+FAFF, and U+2F800 through
+# U+2FAFF are not decomposed."
+if sys.platform != 'darwin':
+    filenames.extend([
+        # Specific code points: NFC(fn), NFD(fn), NFKC(fn) and NFKD(fn) all different
+        '11_\u0385\u03d3\u03d4',
+        '12_\u00a8\u0301\u03d2\u0301\u03d2\u0308', # == NFD('\u0385\u03d3\u03d4')
+        '13_\u0020\u0308\u0301\u038e\u03ab',       # == NFKC('\u0385\u03d3\u03d4')
+        '14_\u1e9b\u1fc1\u1fcd\u1fce\u1fcf\u1fdd\u1fde\u1fdf\u1fed',
+
+        # Specific code points: fn, NFC(fn) and NFKC(fn) all different
+        '15_\u1fee\u1ffd\ufad1',
+        '16_\u2000\u2000\u2000A',
+        '17_\u2001\u2001\u2001A',
+        '18_\u2003\u2003\u2003A',  # == NFC('\u2001\u2001\u2001A')
+        '19_\u0020\u0020\u0020A',  # '\u0020' == ' ' == NFKC('\u2000') ==
+                                   #  NFKC('\u2001') == NFKC('\u2003')
+    ])
+
+
+# Is it Unicode-friendly?
+if not os.path.supports_unicode_filenames:
+    fsencoding = sys.getfilesystemencoding()
+    try:
+        for name in filenames:
+            name.encode(fsencoding)
+    except UnicodeEncodeError:
+        raise unittest.SkipTest("only NT+ and systems with "
+                                "Unicode-friendly filesystem encoding")
+
+
+class UnicodeFileTests(unittest.TestCase):
+    files = set(filenames)
+    normal_form = None
+
+    def setUp(self):
+        try:
+            os.mkdir(support.TESTFN)
+        except FileExistsError:
+            pass
+        self.addCleanup(support.rmtree, support.TESTFN)
+
+        files = set()
+        for name in self.files:
+            name = os.path.join(support.TESTFN, self.norm(name))
+            with open(name, 'wb') as f:
+                f.write((name+'\n').encode("utf-8"))
+            os.stat(name)
+            files.add(name)
+        self.files = files
+
+    def norm(self, s):
+        if self.normal_form:
+            return normalize(self.normal_form, s)
+        return s
+
+    def _apply_failure(self, fn, filename,
+                       expected_exception=FileNotFoundError,
+                       check_filename=True):
+        with self.assertRaises(expected_exception) as c:
+            fn(filename)
+        exc_filename = c.exception.filename
+        if check_filename:
+            self.assertEqual(exc_filename, filename, "Function '%s(%a) failed "
+                             "with bad filename in the exception: %a" %
+                             (fn.__name__, filename, exc_filename))
+
+    @unittest.skip("TODO: RUSTPYTHON")
+    def test_failures(self):
+        # Pass non-existing Unicode filenames all over the place.
+        for name in self.files:
+            name = "not_" + name
+            self._apply_failure(open, name)
+            self._apply_failure(os.stat, name)
+            self._apply_failure(os.chdir, name)
+            self._apply_failure(os.rmdir, name)
+            self._apply_failure(os.remove, name)
+            self._apply_failure(os.listdir, name)
+
+    if sys.platform == 'win32':
+        # Windows is lunatic. Issue #13366.
+        _listdir_failure = NotADirectoryError, FileNotFoundError
+    else:
+        _listdir_failure = NotADirectoryError
+
+    @unittest.skip("TODO: RUSTPYTHON")
+    def test_open(self):
+        for name in self.files:
+            f = open(name, 'wb')
+            f.write((name+'\n').encode("utf-8"))
+            f.close()
+            os.stat(name)
+            self._apply_failure(os.listdir, name, self._listdir_failure)
+
+    # Skip the test on darwin, because darwin does normalize the filename to
+    # NFD (a variant of Unicode NFD form). Normalize the filename to NFC, NFKC,
+    # NFKD in Python is useless, because darwin will normalize it later and so
+    # open(), os.stat(), etc. don't raise any exception.
+    @unittest.skip("TODO: RUSTPYTHON")
+    @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X')
+    def test_normalize(self):
+        files = set(self.files)
+        others = set()
+        for nf in set(['NFC', 'NFD', 'NFKC', 'NFKD']):
+            others |= set(normalize(nf, file) for file in files)
+        others -= files
+        for name in others:
+            self._apply_failure(open, name)
+            self._apply_failure(os.stat, name)
+            self._apply_failure(os.chdir, name)
+            self._apply_failure(os.rmdir, name)
+            self._apply_failure(os.remove, name)
+            self._apply_failure(os.listdir, name)
+
+    # Skip the test on darwin, because darwin uses a normalization different
+    # than Python NFD normalization: filenames are different even if we use
+    # Python NFD normalization.
+    @unittest.skipIf(sys.platform == 'darwin', 'irrelevant test on Mac OS X')
+    def test_listdir(self):
+        sf0 = set(self.files)
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", DeprecationWarning)
+            f1 = os.listdir(support.TESTFN.encode(sys.getfilesystemencoding()))
+        f2 = os.listdir(support.TESTFN)
+        sf2 = set(os.path.join(support.TESTFN, f) for f in f2)
+        self.assertEqual(sf0, sf2, "%a != %a" % (sf0, sf2))
+        self.assertEqual(len(f1), len(f2))
+
+    def test_rename(self):
+        for name in self.files:
+            os.rename(name, "tmp")
+            os.rename("tmp", name)
+
+    def test_directory(self):
+        dirname = os.path.join(support.TESTFN, 'Gr\xfc\xdf-\u66e8\u66e9\u66eb')
+        filename = '\xdf-\u66e8\u66e9\u66eb'
+        with support.temp_cwd(dirname):
+            with open(filename, 'wb') as f:
+                f.write((filename + '\n').encode("utf-8"))
+            os.access(filename,os.R_OK)
+            os.remove(filename)
+
+
+class UnicodeNFCFileTests(UnicodeFileTests):
+    normal_form = 'NFC'
+
+
+class UnicodeNFDFileTests(UnicodeFileTests):
+    normal_form = 'NFD'
+
+
+class UnicodeNFKCFileTests(UnicodeFileTests):
+    normal_form = 'NFKC'
+
+
+class UnicodeNFKDFileTests(UnicodeFileTests):
+    normal_form = 'NFKD'
+
+
+def test_main():
+    support.run_unittest(
+        UnicodeFileTests,
+        UnicodeNFCFileTests,
+        UnicodeNFDFileTests,
+        UnicodeNFKCFileTests,
+        UnicodeNFKDFileTests,
+    )
+
+
+if __name__ == "__main__":
+    test_main()
diff --git a/Lib/test/test_unicode_identifiers.py b/Lib/test/test_unicode_identifiers.py
@@ -0,0 +1,34 @@
+import unittest
+
+class PEP3131Test(unittest.TestCase):
+
+    @unittest.skip("TODO: RUSTPYTHON")
+    def test_valid(self):
+        class T:
+            ä = 1
+            µ = 2 # this is a compatibility character
+            蟒 = 3
+            x󠄀 = 4
+        self.assertEqual(getattr(T, "\xe4"), 1)
+        self.assertEqual(getattr(T, "\u03bc"), 2)
+        self.assertEqual(getattr(T, '\u87d2'), 3)
+        self.assertEqual(getattr(T, 'x\U000E0100'), 4)
+
+    # TODO: RUSTPYTHON
+    @unittest.expectedFailure
+    def test_non_bmp_normalized(self):
+        𝔘𝔫𝔦𝔠𝔬𝔡𝔢 = 1
+        self.assertIn("Unicode", dir())
+
+    @unittest.skip("TODO: RUSTPYTHON")
+    def test_invalid(self):
+        try:
+            from test import badsyntax_3131
+        except SyntaxError as s:
+            self.assertEqual(str(s),
+              "invalid character in identifier (badsyntax_3131.py, line 2)")
+        else:
+            self.fail("expected exception didn't occur")
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py