Skip to content

Commit 2d9b1d0

Browse files
authored
Implement bytes.hex() with optional sep
1 parent 708da57 commit 2d9b1d0

File tree

5 files changed

+141
-41
lines changed

5 files changed

+141
-41
lines changed

Lib/test/test_bytes.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,6 @@ def test_hex(self):
424424
self.assertEqual(self.type2test(b"\x1a\x2b\x30").hex(), '1a2b30')
425425
self.assertEqual(memoryview(b"\x1a\x2b\x30").hex(), '1a2b30')
426426

427-
# TODO: RUSTPYTHON
428-
@unittest.expectedFailure
429427
def test_hex_separator_basics(self):
430428
three_bytes = self.type2test(b'\xb9\x01\xef')
431429
self.assertEqual(three_bytes.hex(), 'b901ef')
@@ -467,8 +465,6 @@ def test_hex_separator_five_bytes(self):
467465
five_bytes = self.type2test(range(90,95))
468466
self.assertEqual(five_bytes.hex(), '5a5b5c5d5e')
469467

470-
# TODO: RUSTPYTHON
471-
@unittest.expectedFailure
472468
def test_hex_separator_six_bytes(self):
473469
six_bytes = self.type2test(x*3 for x in range(1, 7))
474470
self.assertEqual(six_bytes.hex(), '0306090c0f12')

vm/src/bytesinner.rs

Lines changed: 105 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,25 @@ use itertools::Itertools;
33
use num_bigint::BigInt;
44
use num_traits::ToPrimitive;
55

6+
use crate::anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper};
67
use crate::byteslike::try_bytes_like;
78
use crate::function::{OptionalArg, OptionalOption};
89
use crate::obj::objbytearray::PyByteArray;
9-
use crate::obj::objbytes::PyBytes;
10+
use crate::obj::objbytes::{PyBytes, PyBytesRef};
1011
use crate::obj::objint::{self, PyInt, PyIntRef};
1112
use crate::obj::objlist::PyList;
1213
use crate::obj::objmemory::PyMemoryView;
1314
use crate::obj::objsingletons::PyNoneRef;
1415
use crate::obj::objslice::PySliceRef;
1516
use crate::obj::objstr::{self, PyStr, PyStrRef};
17+
use crate::obj::objtuple::PyTuple;
1618
use crate::pyobject::{
1719
BorrowValue, Either, PyComparisonValue, PyIterable, PyIterator, PyObjectRef, PyResult,
1820
TryFromObject, TypeProtocol,
1921
};
2022
use crate::sliceable::{PySliceableSequence, PySliceableSequenceMut, SequenceIndex};
2123
use crate::slots::PyComparisonOp;
2224
use crate::vm::VirtualMachine;
23-
use crate::{
24-
anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper},
25-
obj::objtuple::PyTuple,
26-
};
2725
use rustpython_common::hash;
2826

2927
#[derive(Debug, Default, Clone)]
@@ -549,11 +547,13 @@ impl PyBytesInner {
549547
new
550548
}
551549

552-
pub fn hex(&self) -> String {
553-
self.elements
554-
.iter()
555-
.map(|x| format!("{:02x}", x))
556-
.collect::<String>()
550+
pub fn hex(
551+
&self,
552+
sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
553+
bytes_per_sep: OptionalArg<isize>,
554+
vm: &VirtualMachine,
555+
) -> PyResult<String> {
556+
bytes_to_hex(self.elements.as_slice(), sep, bytes_per_sep, vm)
557557
}
558558

559559
pub fn fromhex(string: &str, vm: &VirtualMachine) -> PyResult<Vec<u8>> {
@@ -1235,3 +1235,98 @@ pub fn bytes_decode(
12351235
))
12361236
})
12371237
}
1238+
1239+
fn hex_impl_no_sep(bytes: &[u8]) -> String {
1240+
let mut buf: Vec<u8> = vec![0; bytes.len() * 2];
1241+
hex::encode_to_slice(bytes, buf.as_mut_slice()).unwrap();
1242+
unsafe { String::from_utf8_unchecked(buf) }
1243+
}
1244+
1245+
fn hex_impl(bytes: &[u8], sep: u8, bytes_per_sep: isize) -> String {
1246+
let len = bytes.len();
1247+
1248+
let buf = if bytes_per_sep < 0 {
1249+
let bytes_per_sep = std::cmp::min(len, (-bytes_per_sep) as usize);
1250+
let chunks = (len - 1) / bytes_per_sep;
1251+
let chunked = chunks * bytes_per_sep;
1252+
let unchunked = len - chunked;
1253+
let mut buf = vec![0; len * 2 + chunks];
1254+
let mut j = 0;
1255+
for i in (0..chunks).map(|i| i * bytes_per_sep) {
1256+
hex::encode_to_slice(
1257+
&bytes[i..i + bytes_per_sep],
1258+
&mut buf[j..j + bytes_per_sep * 2],
1259+
)
1260+
.unwrap();
1261+
j += bytes_per_sep * 2;
1262+
buf[j] = sep;
1263+
j += 1;
1264+
}
1265+
hex::encode_to_slice(&bytes[chunked..], &mut buf[j..j + unchunked * 2]).unwrap();
1266+
buf
1267+
} else {
1268+
let bytes_per_sep = std::cmp::min(len, bytes_per_sep as usize);
1269+
let chunks = (len - 1) / bytes_per_sep;
1270+
let chunked = chunks * bytes_per_sep;
1271+
let unchunked = len - chunked;
1272+
let mut buf = vec![0; len * 2 + chunks];
1273+
hex::encode_to_slice(&bytes[..unchunked], &mut buf[..unchunked * 2]).unwrap();
1274+
let mut j = unchunked * 2;
1275+
for i in (0..chunks).map(|i| i * bytes_per_sep + unchunked) {
1276+
buf[j] = sep;
1277+
j += 1;
1278+
hex::encode_to_slice(
1279+
&bytes[i..i + bytes_per_sep],
1280+
&mut buf[j..j + bytes_per_sep * 2],
1281+
)
1282+
.unwrap();
1283+
j += bytes_per_sep * 2;
1284+
}
1285+
buf
1286+
};
1287+
1288+
unsafe { String::from_utf8_unchecked(buf) }
1289+
}
1290+
1291+
pub fn bytes_to_hex(
1292+
bytes: &[u8],
1293+
sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
1294+
bytes_per_sep: OptionalArg<isize>,
1295+
vm: &VirtualMachine,
1296+
) -> PyResult<String> {
1297+
if bytes.is_empty() {
1298+
return Ok("".to_owned());
1299+
}
1300+
1301+
if let OptionalArg::Present(sep) = sep {
1302+
let bytes_per_sep = bytes_per_sep.unwrap_or(1);
1303+
if bytes_per_sep == 0 {
1304+
return Ok(hex_impl_no_sep(bytes));
1305+
}
1306+
1307+
let s_guard;
1308+
let b_guard;
1309+
let sep = match &sep {
1310+
Either::A(s) => {
1311+
s_guard = s.borrow_value();
1312+
s_guard.as_bytes()
1313+
}
1314+
Either::B(bytes) => {
1315+
b_guard = bytes.borrow_value();
1316+
b_guard
1317+
}
1318+
};
1319+
1320+
if sep.len() != 1 {
1321+
return Err(vm.new_value_error("sep must be length 1.".to_owned()));
1322+
}
1323+
let sep = sep[0];
1324+
if sep > 127 {
1325+
return Err(vm.new_value_error("sep must be ASCII.".to_owned()));
1326+
}
1327+
1328+
Ok(hex_impl(bytes, sep, bytes_per_sep))
1329+
} else {
1330+
Ok(hex_impl_no_sep(bytes))
1331+
}
1332+
}

vm/src/obj/objbytearray.rs

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,32 @@
11
//! Implementation of the python bytearray object.
2-
use bstr::ByteSlice;
3-
use crossbeam_utils::atomic::AtomicCell;
4-
use rustpython_common::borrow::{BorrowedValue, BorrowedValueMut};
5-
use std::mem::size_of;
6-
7-
use super::objint::PyIntRef;
8-
use super::objiter;
9-
use super::objstr::PyStrRef;
10-
use super::objtype::PyTypeRef;
112
use crate::anystr::{self, AnyStr};
123
use crate::bytesinner::{
134
bytes_decode, ByteInnerFindOptions, ByteInnerNewOptions, ByteInnerPaddingOptions,
145
ByteInnerSplitOptions, ByteInnerTranslateOptions, DecodeArgs, PyBytesInner,
156
};
167
use crate::byteslike::PyBytesLike;
8+
use crate::common::borrow::{BorrowedValue, BorrowedValueMut};
179
use crate::common::lock::{
1810
PyRwLock, PyRwLockReadGuard, PyRwLockUpgradableReadGuard, PyRwLockWriteGuard,
1911
};
2012
use crate::function::{OptionalArg, OptionalOption};
21-
use crate::obj::objbytes::PyBytes;
13+
use crate::obj::objbytes::{PyBytes, PyBytesRef};
14+
use crate::obj::objint::PyIntRef;
15+
use crate::obj::objiter;
2216
use crate::obj::objmemory::{Buffer, BufferOptions};
17+
use crate::obj::objstr::PyStrRef;
2318
use crate::obj::objtuple::PyTupleRef;
19+
use crate::obj::objtype::PyTypeRef;
2420
use crate::pyobject::{
2521
BorrowValue, Either, IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyContext,
2622
PyIterable, PyObjectRef, PyRef, PyResult, PyValue,
2723
};
2824
use crate::sliceable::SequenceIndex;
2925
use crate::slots::{BufferProtocol, Comparable, Hashable, PyComparisonOp, Unhashable};
3026
use crate::vm::VirtualMachine;
27+
use bstr::ByteSlice;
28+
use crossbeam_utils::atomic::AtomicCell;
29+
use std::mem::size_of;
3130

3231
/// "bytearray(iterable_of_ints) -> bytearray\n\
3332
/// bytearray(string, encoding[, errors]) -> bytearray\n\
@@ -249,8 +248,13 @@ impl PyByteArray {
249248
}
250249

251250
#[pymethod(name = "hex")]
252-
fn hex(&self) -> String {
253-
self.borrow_value().hex()
251+
fn hex(
252+
&self,
253+
sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
254+
bytes_per_sep: OptionalArg<isize>,
255+
vm: &VirtualMachine,
256+
) -> PyResult<String> {
257+
self.borrow_value().hex(sep, bytes_per_sep, vm)
254258
}
255259

256260
#[pymethod]

vm/src/obj/objbytes.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -221,11 +221,14 @@ impl PyBytes {
221221
self.inner.swapcase().into()
222222
}
223223

224-
// TODO: Changed in version 3.8: bytes.hex() now supports optional sep and
225-
// bytes_per_sep parameters to insert separators between bytes in the hex output.
226224
#[pymethod(name = "hex")]
227-
pub(crate) fn hex(&self) -> String {
228-
self.inner.hex()
225+
pub(crate) fn hex(
226+
&self,
227+
sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
228+
bytes_per_sep: OptionalArg<isize>,
229+
vm: &VirtualMachine,
230+
) -> PyResult<String> {
231+
self.inner.hex(sep, bytes_per_sep, vm)
229232
}
230233

231234
#[pymethod]

vm/src/obj/objmemory.rs

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
use std::{fmt::Debug, ops::Deref};
22

3+
use crate::bytesinner::bytes_to_hex;
34
use crate::common::borrow::{BorrowedValue, BorrowedValueMut};
45
use crate::common::hash::PyHash;
6+
use crate::function::OptionalArg;
57
use crate::obj::objbytes::{PyBytes, PyBytesRef};
68
use crate::obj::objlist::{PyList, PyListRef};
79
use crate::obj::objslice::PySliceRef;
8-
use crate::obj::objstr::PyStr;
10+
use crate::obj::objstr::{PyStr, PyStrRef};
911
use crate::obj::objtype::PyTypeRef;
1012
use crate::pyobject::{
11-
IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyContext, PyObjectRef, PyRef,
12-
PyResult, PyThreadingConstraint, PyValue, TypeProtocol,
13+
Either, IdProtocol, IntoPyObject, PyClassImpl, PyComparisonValue, PyContext, PyObjectRef,
14+
PyRef, PyResult, PyThreadingConstraint, PyValue, TypeProtocol,
1315
};
1416
use crate::sliceable::{convert_slice, saturate_range, wrap_index, SequenceIndex};
1517
use crate::slots::{BufferProtocol, Comparable, Hashable, PyComparisonOp};
@@ -592,10 +594,13 @@ impl PyMemoryView {
592594
}
593595
}
594596

595-
// TODO: Changed in version 3.8: memoryview.hex() now supports optional sep and bytes_per_sep
596-
// parameters to insert separators between bytes in the hex output.
597597
#[pymethod]
598-
fn hex(zelf: PyRef<Self>, vm: &VirtualMachine) -> PyResult<String> {
598+
fn hex(
599+
zelf: PyRef<Self>,
600+
sep: OptionalArg<Either<PyStrRef, PyBytesRef>>,
601+
bytes_per_sep: OptionalArg<isize>,
602+
vm: &VirtualMachine,
603+
) -> PyResult<String> {
599604
zelf.try_not_released(vm)?;
600605
let guard;
601606
let vec;
@@ -609,11 +614,8 @@ impl PyMemoryView {
609614
vec.as_slice()
610615
}
611616
};
612-
let s = bytes
613-
.iter()
614-
.map(|x| format!("{:02x}", x))
615-
.collect::<String>();
616-
Ok(s)
617+
618+
bytes_to_hex(bytes, sep, bytes_per_sep, vm)
617619
}
618620

619621
fn eq(zelf: &PyRef<Self>, other: &PyObjectRef, vm: &VirtualMachine) -> PyResult<bool> {

0 commit comments

Comments
 (0)