Skip to content

Commit 5594660

Browse files
committed
Fix bytes decode/encoding tests
1 parent c5116b7 commit 5594660

File tree

5 files changed

+45
-54
lines changed

5 files changed

+45
-54
lines changed

Lib/_codecs.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -915,7 +915,7 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru
915915
p = []
916916
if byteorder == 'native':
917917
if (size >= 2):
918-
bom = (ord(s[ihi]) << 8) | ord(s[ilo])
918+
bom = (s[ihi] << 8) | s[ilo]
919919
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
920920
if sys.byteorder == 'little':
921921
if (bom == 0xFEFF):
@@ -962,11 +962,11 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru
962962
# /* The remaining input chars are ignored if the callback
963963
## chooses to skip the input */
964964

965-
ch = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
965+
ch = (s[q+ihi] << 8) | s[q+ilo]
966966
q += 2
967967

968968
if (ch < 0xD800 or ch > 0xDFFF):
969-
p += chr(ch)
969+
p.append(chr(ch))
970970
continue
971971

972972
#/* UTF-16 code pair: */
@@ -977,15 +977,14 @@ def PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder='native', final=Tru
977977
unicode_call_errorhandler(errors, 'utf-16', errmsg, s, startinpos, endinpos, True)
978978

979979
if (0xD800 <= ch and ch <= 0xDBFF):
980-
ch2 = (ord(s[q+ihi]) << 8) | ord(s[q+ilo])
980+
ch2 = (s[q+ihi] << 8) | s[q+ilo]
981981
q += 2
982982
if (0xDC00 <= ch2 and ch2 <= 0xDFFF):
983983
#ifndef Py_UNICODE_WIDE
984984
if sys.maxunicode < 65536:
985-
p += chr(ch)
986-
p += chr(ch2)
985+
p += [chr(ch), chr(ch2)]
987986
else:
988-
p += chr((((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000)
987+
p.append(chr((((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000))
989988
#endif
990989
continue
991990

Lib/test/test_bytes.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -300,8 +300,6 @@ def test_extended_getslice(self):
300300
for step in indices[1:]:
301301
self.assertEqual(b[start:stop:step], self.type2test(L[start:stop:step]))
302302

303-
# TODO: RUSTPYTHON
304-
@unittest.expectedFailure
305303
def test_encoding(self):
306304
sample = "Hello world\n\u1234\u5678\u9abc"
307305
for enc in ("utf-8", "utf-16"):
@@ -311,8 +309,6 @@ def test_encoding(self):
311309
b = self.type2test(sample, "latin-1", "ignore")
312310
self.assertEqual(b, self.type2test(sample[:-3], "utf-8"))
313311

314-
# TODO: RUSTPYTHON
315-
@unittest.expectedFailure
316312
def test_decode(self):
317313
sample = "Hello world\n\u1234\u5678\u9abc"
318314
for enc in ("utf-8", "utf-16"):

vm/src/bytesinner.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,3 +1290,29 @@ impl<'s> PyCommonString<'s, u8> for [u8] {
12901290
splited
12911291
}
12921292
}
1293+
1294+
#[derive(FromArgs)]
1295+
pub struct DecodeArgs {
1296+
#[pyarg(positional_or_keyword, default = "None")]
1297+
encoding: Option<PyStringRef>,
1298+
#[pyarg(positional_or_keyword, default = "None")]
1299+
errors: Option<PyStringRef>,
1300+
}
1301+
1302+
pub fn bytes_decode(
1303+
zelf: PyObjectRef,
1304+
args: DecodeArgs,
1305+
vm: &VirtualMachine,
1306+
) -> PyResult<PyStringRef> {
1307+
let DecodeArgs { encoding, errors } = args;
1308+
vm.decode(zelf, encoding.clone(), errors)?
1309+
.downcast::<PyString>()
1310+
.map_err(|obj| {
1311+
vm.new_type_error(format!(
1312+
"'{}' decoder returned '{}' instead of 'str'; use codecs.encode() to \
1313+
encode arbitrary types",
1314+
encoding.as_ref().map_or("utf-8", |s| s.borrow_value()),
1315+
obj.lease_class().name,
1316+
))
1317+
})
1318+
}

vm/src/obj/objbytearray.rs

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ use std::mem::size_of;
77
use super::objint::PyIntRef;
88
use super::objiter;
99
use super::objsequence::SequenceIndex;
10-
use super::objstr::{PyString, PyStringRef};
10+
use super::objstr::PyStringRef;
1111
use super::objtype::PyClassRef;
1212
use crate::bytesinner::{
13-
ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions,
14-
ByteInnerTranslateOptions, ByteOr, PyBytesInner,
13+
bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions,
14+
ByteInnerSplitOptions, ByteInnerTranslateOptions, ByteOr, DecodeArgs, PyBytesInner,
1515
};
1616
use crate::common::cell::{PyRwLock, PyRwLockReadGuard, PyRwLockWriteGuard};
1717
use crate::function::{OptionalArg, OptionalOption};
1818
use crate::pyobject::{
1919
BorrowValue, Either, PyClassImpl, PyComparisonValue, PyContext, PyIterable, PyObjectRef, PyRef,
20-
PyResult, PyValue, TryFromObject, TypeProtocol,
20+
PyResult, PyValue, TryFromObject,
2121
};
2222
use crate::pystr::{self, PyCommonString};
2323
use crate::vm::VirtualMachine;
@@ -582,23 +582,8 @@ impl PyByteArray {
582582
}
583583

584584
#[pymethod]
585-
fn decode(
586-
zelf: PyRef<Self>,
587-
encoding: OptionalArg<PyStringRef>,
588-
errors: OptionalArg<PyStringRef>,
589-
vm: &VirtualMachine,
590-
) -> PyResult<PyStringRef> {
591-
let encoding = encoding.into_option();
592-
vm.decode(zelf.into_object(), encoding.clone(), errors.into_option())?
593-
.downcast::<PyString>()
594-
.map_err(|obj| {
595-
vm.new_type_error(format!(
596-
"'{}' decoder returned '{}' instead of 'str'; use codecs.encode() to \
597-
encode arbitrary types",
598-
encoding.as_ref().map_or("utf-8", |s| s.borrow_value()),
599-
obj.lease_class().name,
600-
))
601-
})
585+
fn decode(zelf: PyRef<Self>, args: DecodeArgs, vm: &VirtualMachine) -> PyResult<PyStringRef> {
586+
bytes_decode(zelf.into_object(), args, vm)
602587
}
603588
}
604589

vm/src/obj/objbytes.rs

Lines changed: 7 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,18 @@ use std::ops::Deref;
66
use super::objint::PyIntRef;
77
use super::objiter;
88
use super::objsequence::SequenceIndex;
9-
use super::objstr::{PyString, PyStringRef};
9+
use super::objstr::PyStringRef;
1010
use super::objtype::PyClassRef;
1111
use crate::bytesinner::{
12-
ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions, ByteInnerSplitOptions,
13-
ByteInnerTranslateOptions, PyBytesInner,
12+
bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions,
13+
ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner,
1414
};
1515
use crate::function::{OptionalArg, OptionalOption};
1616
use crate::pyobject::{
1717
BorrowValue, Either, IntoPyObject,
1818
PyArithmaticValue::{self, *},
1919
PyClassImpl, PyComparisonValue, PyContext, PyIterable, PyObjectRef, PyRef, PyResult, PyValue,
20-
TryFromObject, TypeProtocol,
20+
TryFromObject,
2121
};
2222
use crate::pystr::{self, PyCommonString};
2323
use crate::vm::VirtualMachine;
@@ -467,24 +467,9 @@ impl PyBytes {
467467
/// For a list of possible encodings,
468468
/// see https://docs.python.org/3/library/codecs.html#standard-encodings
469469
/// currently, only 'utf-8' and 'ascii' emplemented
470-
#[pymethod(name = "decode")]
471-
fn decode(
472-
zelf: PyRef<Self>,
473-
encoding: OptionalArg<PyStringRef>,
474-
errors: OptionalArg<PyStringRef>,
475-
vm: &VirtualMachine,
476-
) -> PyResult<PyStringRef> {
477-
let encoding = encoding.into_option();
478-
vm.decode(zelf.into_object(), encoding.clone(), errors.into_option())?
479-
.downcast::<PyString>()
480-
.map_err(|obj| {
481-
vm.new_type_error(format!(
482-
"'{}' decoder returned '{}' instead of 'str'; use codecs.encode() to \
483-
encode arbitrary types",
484-
encoding.as_ref().map_or("utf-8", |s| s.borrow_value()),
485-
obj.lease_class().name,
486-
))
487-
})
470+
#[pymethod]
471+
fn decode(zelf: PyRef<Self>, args: DecodeArgs, vm: &VirtualMachine) -> PyResult<PyStringRef> {
472+
bytes_decode(zelf.into_object(), args, vm)
488473
}
489474
}
490475

0 commit comments

Comments
 (0)