-
Notifications
You must be signed in to change notification settings - Fork 134
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Accept unicode arguments at a csv.writer (fixes #2632).
The CPython csv.writer accepts unicode strings and encodes them using the current default encoding. This is not documented, but we can easily reproduce the behaviour, which is relied on by some users. A simple test_csv_jy is added for UTF-8 default. We hide sys.setdefaultencoding again after use since this otherwise causes test_site to fail. The same fault is corrected, where it was latent in test_unicode_jy.
- Loading branch information
Showing
4 changed files
with
146 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
# -*- coding: utf-8 -*- | ||
# Copyright (C) 2017 Jython Developers | ||
|
||
# Additional csv module unit tests for Jython | ||
|
||
import csv | ||
import io | ||
import sys | ||
from tempfile import TemporaryFile | ||
from test import test_support | ||
import unittest | ||
|
||
# This test has been adapted from Python 3 test_csv.TestUnicode. In Python 3, | ||
# the csv module supports Unicode directly. In Python 2, it does not, except | ||
# that it is transparent to byte data. Many tools, however, accept UTF-8 | ||
# encoded text in a CSV file. | ||
# | ||
class EncodingContext(object): | ||
"""Context manager to save and restore the encoding. | ||
Use like this: | ||
with EncodingContext("utf-8"): | ||
self.assertEqual("'caf\xc3\xa9'", u"'caf\xe9'") | ||
""" | ||
|
||
def __init__(self, encoding): | ||
if not hasattr(sys, "setdefaultencoding"): | ||
reload(sys) | ||
self.original_encoding = sys.getdefaultencoding() | ||
sys.setdefaultencoding(encoding) | ||
|
||
def __enter__(self): | ||
return self | ||
|
||
def __exit__(self, *ignore_exc): | ||
sys.setdefaultencoding(self.original_encoding) | ||
|
||
class TestUnicode(unittest.TestCase): | ||
|
||
names = [u"Martin von Löwis", | ||
u"Marc André Lemburg", | ||
u"Guido van Rossum", | ||
u"François Pinard", | ||
u"稲田直樹"] | ||
|
||
def test_decode_read(self): | ||
# The user code receives byte data and takes care of the decoding | ||
with TemporaryFile("w+b") as fileobj: | ||
line = u",".join(self.names) + u"\r\n" | ||
fileobj.write(line.encode('utf-8')) | ||
fileobj.seek(0) | ||
reader = csv.reader(fileobj) | ||
# The reader yields rows of byte strings that decode to the data | ||
table = [[e.decode('utf-8') for e in row] for row in reader] | ||
self.assertEqual(table, [self.names]) | ||
|
||
def test_encode_write(self): | ||
# The user encodes unicode objects to byte data that csv writes | ||
with TemporaryFile("w+b") as fileobj: | ||
writer = csv.writer(fileobj) | ||
# We present a row of encoded strings to the writer | ||
writer.writerow([n.encode('utf-8') for n in self.names]) | ||
# We expect the file contents to be the UTF-8 of the csv data | ||
expected = u",".join(self.names) + u"\r\n" | ||
fileobj.seek(0) | ||
self.assertEqual(fileobj.read().decode('utf-8'), expected) | ||
|
||
def test_unicode_write(self): | ||
# The user supplies unicode data that csv.writer default-encodes | ||
# (undocumented feature relied upon by client code). | ||
# See Issue #2632 https://github.com/jythontools/jython/issues/90 | ||
with TemporaryFile("w+b") as fileobj: | ||
with EncodingContext('utf-8'): | ||
writer = csv.writer(fileobj) | ||
# We present a row of unicode strings to the writer | ||
writer.writerow(self.names) | ||
# We expect the file contents to be the UTF-8 of the csv data | ||
expected = u",".join(self.names) + u"\r\n" | ||
fileobj.seek(0) | ||
self.assertEqual(fileobj.read().decode(), expected) | ||
|
||
|
||
def test_main(): | ||
# We'll be enabling sys.setdefaultencoding so remember to disable | ||
had_set = hasattr(sys, "setdefaultencoding") | ||
try: | ||
test_support.run_unittest( | ||
TestUnicode, | ||
) | ||
finally: | ||
if not had_set: | ||
delattr(sys, "setdefaultencoding") | ||
|
||
if __name__ == "__main__": | ||
test_main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters