test_unicode_file.py revision aa98058cc44ba20f35c106d20918c6196b737561
1# Test some Unicode file name semantics
2# We dont test many operations on files other than
3# that their names can be used with Unicode characters.
4import os, glob, time, shutil
5import unicodedata
6
7import unittest
8from test.test_support import run_unittest, TESTFN_UNICODE
9from test.test_support import TESTFN_ENCODING, TESTFN_UNICODE_UNENCODEABLE
10try:
11    TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING)
12except (UnicodeError, TypeError):
13    # Either the file system encoding is None, or the file name
14    # cannot be encoded in the file system encoding.
15    raise unittest.SkipTest("No Unicode filesystem semantics on this platform.")
16
17if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE:
18    # The file system encoding does not support Latin-1
19    # (which test_support assumes), so try the file system
20    # encoding instead.
21    import sys
22    try:
23        TESTFN_UNICODE = unicode("@test-\xe0\xf2", sys.getfilesystemencoding())
24        TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING)
25        if '?' in TESTFN_ENCODED:
26            # MBCS will not report the error properly
27            raise UnicodeError, "mbcs encoding problem"
28    except (UnicodeError, TypeError):
29        raise unittest.SkipTest("Cannot find a suiteable filename.")
30
31if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE:
32    raise unittest.SkipTest("Cannot find a suitable filename.")
33
34def remove_if_exists(filename):
35    if os.path.exists(filename):
36        os.unlink(filename)
37
38class TestUnicodeFiles(unittest.TestCase):
39    # The 'do_' functions are the actual tests.  They generally assume the
40    # file already exists etc.
41
42    # Do all the tests we can given only a single filename.  The file should
43    # exist.
44    def _do_single(self, filename):
45        self.assertTrue(os.path.exists(filename))
46        self.assertTrue(os.path.isfile(filename))
47        self.assertTrue(os.access(filename, os.R_OK))
48        self.assertTrue(os.path.exists(os.path.abspath(filename)))
49        self.assertTrue(os.path.isfile(os.path.abspath(filename)))
50        self.assertTrue(os.access(os.path.abspath(filename), os.R_OK))
51        os.chmod(filename, 0777)
52        os.utime(filename, None)
53        os.utime(filename, (time.time(), time.time()))
54        # Copy/rename etc tests using the same filename
55        self._do_copyish(filename, filename)
56        # Filename should appear in glob output
57        self.assertTrue(
58            os.path.abspath(filename)==os.path.abspath(glob.glob(filename)[0]))
59        # basename should appear in listdir.
60        path, base = os.path.split(os.path.abspath(filename))
61        if isinstance(base, str):
62            base = base.decode(TESTFN_ENCODING)
63        file_list = os.listdir(path)
64        # listdir() with a unicode arg may or may not return Unicode
65        # objects, depending on the platform.
66        if file_list and isinstance(file_list[0], str):
67            file_list = [f.decode(TESTFN_ENCODING) for f in file_list]
68
69        # Normalize the unicode strings, as round-tripping the name via the OS
70        # may return a different (but equivalent) value.
71        base = unicodedata.normalize("NFD", base)
72        file_list = [unicodedata.normalize("NFD", f) for f in file_list]
73
74        self.assertIn(base, file_list)
75
76    # Do as many "equivalancy' tests as we can - ie, check that although we
77    # have different types for the filename, they refer to the same file.
78    def _do_equivalent(self, filename1, filename2):
79        # Note we only check "filename1 against filename2" - we don't bother
80        # checking "filename2 against 1", as we assume we are called again with
81        # the args reversed.
82        self.assertTrue(type(filename1)!=type(filename2),
83                    "No point checking equivalent filenames of the same type")
84        # stat and lstat should return the same results.
85        self.assertEqual(os.stat(filename1),
86                             os.stat(filename2))
87        self.assertEqual(os.lstat(filename1),
88                             os.lstat(filename2))
89        # Copy/rename etc tests using equivalent filename
90        self._do_copyish(filename1, filename2)
91
92    # Tests that copy, move, etc one file to another.
93    def _do_copyish(self, filename1, filename2):
94        # Should be able to rename the file using either name.
95        self.assertTrue(os.path.isfile(filename1)) # must exist.
96        os.rename(filename1, filename2 + ".new")
97        self.assertTrue(os.path.isfile(filename1+".new"))
98        os.rename(filename1 + ".new", filename2)
99        self.assertTrue(os.path.isfile(filename2))
100
101        shutil.copy(filename1, filename2 + ".new")
102        os.unlink(filename1 + ".new") # remove using equiv name.
103        # And a couple of moves, one using each name.
104        shutil.move(filename1, filename2 + ".new")
105        self.assertTrue(not os.path.exists(filename2))
106        shutil.move(filename1 + ".new", filename2)
107        self.assertTrue(os.path.exists(filename1))
108        # Note - due to the implementation of shutil.move,
109        # it tries a rename first.  This only fails on Windows when on
110        # different file systems - and this test can't ensure that.
111        # So we test the shutil.copy2 function, which is the thing most
112        # likely to fail.
113        shutil.copy2(filename1, filename2 + ".new")
114        os.unlink(filename1 + ".new")
115
116    def _do_directory(self, make_name, chdir_name, encoded):
117        cwd = os.getcwd()
118        if os.path.isdir(make_name):
119            os.rmdir(make_name)
120        os.mkdir(make_name)
121        try:
122            os.chdir(chdir_name)
123            try:
124                if not encoded:
125                    cwd_result = os.getcwdu()
126                    name_result = make_name
127                else:
128                    cwd_result = os.getcwd().decode(TESTFN_ENCODING)
129                    name_result = make_name.decode(TESTFN_ENCODING)
130
131                cwd_result = unicodedata.normalize("NFD", cwd_result)
132                name_result = unicodedata.normalize("NFD", name_result)
133
134                self.assertEqual(os.path.basename(cwd_result),name_result)
135            finally:
136                os.chdir(cwd)
137        finally:
138            os.rmdir(make_name)
139
140    # The '_test' functions 'entry points with params' - ie, what the
141    # top-level 'test' functions would be if they could take params
142    def _test_single(self, filename):
143        remove_if_exists(filename)
144        f = file(filename, "w")
145        f.close()
146        try:
147            self._do_single(filename)
148        finally:
149            os.unlink(filename)
150        self.assertTrue(not os.path.exists(filename))
151        # and again with os.open.
152        f = os.open(filename, os.O_CREAT)
153        os.close(f)
154        try:
155            self._do_single(filename)
156        finally:
157            os.unlink(filename)
158
159    def _test_equivalent(self, filename1, filename2):
160        remove_if_exists(filename1)
161        self.assertTrue(not os.path.exists(filename2))
162        f = file(filename1, "w")
163        f.close()
164        try:
165            self._do_equivalent(filename1, filename2)
166        finally:
167            os.unlink(filename1)
168
169    # The 'test' functions are unittest entry points, and simply call our
170    # _test functions with each of the filename combinations we wish to test
171    def test_single_files(self):
172        self._test_single(TESTFN_ENCODED)
173        self._test_single(TESTFN_UNICODE)
174        if TESTFN_UNICODE_UNENCODEABLE is not None:
175            self._test_single(TESTFN_UNICODE_UNENCODEABLE)
176
177    def test_equivalent_files(self):
178        self._test_equivalent(TESTFN_ENCODED, TESTFN_UNICODE)
179        self._test_equivalent(TESTFN_UNICODE, TESTFN_ENCODED)
180
181    def test_directories(self):
182        # For all 'equivalent' combinations:
183        #  Make dir with encoded, chdir with unicode, checkdir with encoded
184        #  (or unicode/encoded/unicode, etc
185        ext = ".dir"
186        self._do_directory(TESTFN_ENCODED+ext, TESTFN_ENCODED+ext, True)
187        self._do_directory(TESTFN_ENCODED+ext, TESTFN_UNICODE+ext, True)
188        self._do_directory(TESTFN_UNICODE+ext, TESTFN_ENCODED+ext, False)
189        self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False)
190        # Our directory name that can't use a non-unicode name.
191        if TESTFN_UNICODE_UNENCODEABLE is not None:
192            self._do_directory(TESTFN_UNICODE_UNENCODEABLE+ext,
193                               TESTFN_UNICODE_UNENCODEABLE+ext,
194                               False)
195
196def test_main():
197    run_unittest(__name__)
198
199if __name__ == "__main__":
200    test_main()
201