Skip to content

Commit

Permalink
BufferedFile.read() now returns byte strings instead of text strings
Browse files Browse the repository at this point in the history
It is the right thing to do since we have no idea what encoding the file
is in, or even if the file is text data. BufferedFile.readline() is
unchanged and returns text strings assuming the file is utf-8 encoded.
This should fix the following issue:
http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252

Antoine Brenner

Conflicts:
	sites/www/changelog.rst
  • Loading branch information
antoine-gymglish authored and bitprophet committed Apr 17, 2014
1 parent 9b2388c commit 3fce8ab
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 33 deletions.
21 changes: 15 additions & 6 deletions paramiko/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,15 @@ def read(self, size=None):
file first). If the ``size`` argument is negative or omitted, read all
the remaining data in the file.
`'b' mode flag is ignored (self.FLAG_BINARY in self._flags), because
SSH treats all files as binary, since we have no idea what encoding
the file is in, or even if the file is text data.
:param int size: maximum number of bytes to read
:return:
data read from the file (as a `str`), or an empty string if EOF was
encountered immediately
data read from the file (as bytes ), or an empty string
if EOF was encountered immediately
"""
if self._closed:
raise IOError('File is closed')
Expand All @@ -148,12 +153,12 @@ def read(self, size=None):
result += new_data
self._realpos += len(new_data)
self._pos += len(new_data)
return result if self._flags & self.FLAG_BINARY else u(result)
return result
if size <= len(self._rbuffer):
result = self._rbuffer[:size]
self._rbuffer = self._rbuffer[size:]
self._pos += len(result)
return result if self._flags & self.FLAG_BINARY else u(result)
return result
while len(self._rbuffer) < size:
read_size = size - len(self._rbuffer)
if self._flags & self.FLAG_BUFFERED:
Expand All @@ -169,7 +174,7 @@ def read(self, size=None):
result = self._rbuffer[:size]
self._rbuffer = self._rbuffer[size:]
self._pos += len(result)
return result if self._flags & self.FLAG_BINARY else u(result)
return result

def readline(self, size=None):
"""
Expand All @@ -186,8 +191,12 @@ def readline(self, size=None):
:param int size: maximum length of returned string.
:return:
next line of the file (`str`), or an empty string if the end of the
next line of the file, or an empty string if the end of the
file has been reached.
If the file was opened in binary 'b' mode: bytes are returned
Else: the encoding of the file is assumed to be utf-8 (default
encoding used by paramiko.py3compat.u) and character strings
(`str`) are returned
"""
# it's almost silly how complex this function is.
if self._closed:
Expand Down
7 changes: 7 additions & 0 deletions sites/www/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@
Changelog
=========

* :bug: BufferedFile.read() now returns byte strings instead of text strings
It is the right thing to do since we have no idea what encoding the file
is in, or even if the file is text data. BufferedFile.readline() is
unchanged and returns text strings assuming the file is utf-8 encoded.
This should fix the following issue:
http://comments.gmane.org/gmane.comp.sysutils.backup.obnam/252
Thanks Antoine Brenner
* :bug:`-` Added self.args for exception classes. Used for unpickling. Related
to (`Fabric #986 <https://github.com/fabric/fabric/issues/986>`_, `Fabric
#714 <https://github.com/fabric/fabric/issues/714>`_). Thanks to Alex
Expand Down
45 changes: 25 additions & 20 deletions tests/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class BufferedFileTest (unittest.TestCase):
def test_1_simple(self):
f = LoopbackFile('r')
try:
f.write('hi')
f.write(b'hi')
self.assertTrue(False, 'no exception on write to read-only file')
except:
pass
Expand All @@ -69,7 +69,7 @@ def test_1_simple(self):

def test_2_readline(self):
f = LoopbackFile('r+U')
f.write('First line.\nSecond line.\r\nThird line.\nFinal line non-terminated.')
f.write(b'First line.\nSecond line.\r\nThird line.\nFinal line non-terminated.')
self.assertEqual(f.readline(), 'First line.\n')
# universal newline mode should convert this linefeed:
self.assertEqual(f.readline(), 'Second line.\n')
Expand All @@ -93,9 +93,9 @@ def test_3_lf(self):
try to trick the linefeed detector.
"""
f = LoopbackFile('r+U')
f.write('First line.\r')
f.write(b'First line.\r')
self.assertEqual(f.readline(), 'First line.\n')
f.write('\nSecond.\r\n')
f.write(b'\nSecond.\r\n')
self.assertEqual(f.readline(), 'Second.\n')
f.close()
self.assertEqual(f.newlines, crlf)
Expand All @@ -105,7 +105,7 @@ def test_4_write(self):
verify that write buffering is on.
"""
f = LoopbackFile('r+', 1)
f.write('Complete line.\nIncomplete line.')
f.write(b'Complete line.\nIncomplete line.')
self.assertEqual(f.readline(), 'Complete line.\n')
self.assertEqual(f.readline(), '')
f.write('..\n')
Expand All @@ -118,35 +118,40 @@ def test_5_flush(self):
"""
f = LoopbackFile('r+', 512)
f.write('Not\nquite\n512 bytes.\n')
self.assertEqual(f.read(1), '')
self.assertEqual(f.read(1), b'')
f.flush()
self.assertEqual(f.read(5), 'Not\nq')
self.assertEqual(f.read(10), 'uite\n512 b')
self.assertEqual(f.read(9), 'ytes.\n')
self.assertEqual(f.read(3), '')
self.assertEqual(f.read(5), b'Not\nq')
self.assertEqual(f.read(10), b'uite\n512 b')
self.assertEqual(f.read(9), b'ytes.\n')
self.assertEqual(f.read(3), b'')
f.close()

def test_6_buffering(self):
"""
verify that flushing happens automatically on buffer crossing.
"""
f = LoopbackFile('r+', 16)
f.write('Too small.')
self.assertEqual(f.read(4), '')
f.write(' ')
self.assertEqual(f.read(4), '')
f.write('Enough.')
self.assertEqual(f.read(20), 'Too small. Enough.')
f.write(b'Too small.')
self.assertEqual(f.read(4), b'')
f.write(b' ')
self.assertEqual(f.read(4), b'')
f.write(b'Enough.')
self.assertEqual(f.read(20), b'Too small. Enough.')
f.close()

def test_7_read_all(self):
"""
verify that read(-1) returns everything left in the file.
"""
f = LoopbackFile('r+', 16)
f.write('The first thing you need to do is open your eyes. ')
f.write('Then, you need to close them again.\n')
f.write(b'The first thing you need to do is open your eyes. ')
f.write(b'Then, you need to close them again.\n')
s = f.read(-1)
self.assertEqual(s, 'The first thing you need to do is open your eyes. Then, you ' +
'need to close them again.\n')
self.assertEqual(s, b'The first thing you need to do is open your eyes. Then, you ' +
b'need to close them again.\n')
f.close()

if __name__ == '__main__':
from unittest import main
main()

10 changes: 5 additions & 5 deletions tests/test_sftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,7 @@ def test_B_write_seek(self):
self.assertEqual(sftp.stat(FOLDER + '/testing.txt').st_size, 13)
with sftp.open(FOLDER + '/testing.txt', 'r') as f:
data = f.read(20)
self.assertEqual(data, 'hello kiddy.\n')
self.assertEqual(data, b'hello kiddy.\n')
finally:
sftp.remove(FOLDER + '/testing.txt')

Expand Down Expand Up @@ -466,8 +466,8 @@ def test_D_flush_seek(self):
f.write('?\n')

with sftp.open(FOLDER + '/happy.txt', 'r') as f:
self.assertEqual(f.readline(), 'full line?\n')
self.assertEqual(f.read(7), 'partial')
self.assertEqual(f.readline(), u'full line?\n')
self.assertEqual(f.read(7), b'partial')
finally:
try:
sftp.remove(FOLDER + '/happy.txt')
Expand Down Expand Up @@ -662,8 +662,8 @@ def test_N_put_without_confirm(self):

fd, localname = mkstemp()
os.close(fd)
text = 'All I wanted was a plastic bunny rabbit.\n'
with open(localname, 'w') as f:
text = b'All I wanted was a plastic bunny rabbit.\n'
with open(localname, 'wb') as f:
f.write(text)
saved_progress = []

Expand Down
4 changes: 2 additions & 2 deletions tests/test_sftp_big.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_2_big_file(self):
write a 1MB file with no buffering.
"""
sftp = get_sftp()
kblob = (1024 * 'x')
kblob = (1024 * b'x')
start = time.time()
try:
with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f:
Expand Down Expand Up @@ -231,7 +231,7 @@ def test_6_lots_of_prefetching(self):
without using it, to verify that paramiko doesn't get confused.
"""
sftp = get_sftp()
kblob = (1024 * 'x')
kblob = (1024 * b'x')
try:
with sftp.open('%s/hongry.txt' % FOLDER, 'w') as f:
f.set_pipelined(True)
Expand Down

0 comments on commit 3fce8ab

Please sign in to comment.