Skip to content

Commit 9c57ae4

Browse files
committed
support bytes creation from hex and ascii
1 parent d7275c7 commit 9c57ae4

File tree

2 files changed

+107
-1
lines changed

2 files changed

+107
-1
lines changed

parser/src/lexer.rs

Lines changed: 103 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,7 @@ where
542542
let tok = if is_bytes {
543543
if string_content.is_ascii() {
544544
Tok::Bytes {
545-
value: string_content.as_bytes().to_vec(),
545+
value: self.lex_byte(string_content)?,
546546
}
547547
} else {
548548
return Err(LexicalError::StringError);
@@ -1105,6 +1105,84 @@ where
11051105
let tok_end = self.get_pos();
11061106
Ok((tok_start, ty, tok_end))
11071107
}
1108+
1109+
fn lex_byte(&self, s: String) -> Result<Vec<u8>, LexicalError> {
1110+
let mut res = vec![];
1111+
let mut escape = false; //flag if previous was \
1112+
let mut hex_on = false; // hex mode on or off
1113+
let mut hex_value = String::new();
1114+
1115+
for c in s.chars() {
1116+
match c {
1117+
'\\' => {
1118+
if escape {
1119+
res.push(92);
1120+
escape = false;
1121+
} else {
1122+
escape = true;
1123+
}
1124+
}
1125+
1126+
'x' => {
1127+
if escape {
1128+
hex_on = true;
1129+
} else {
1130+
res.push(120);
1131+
}
1132+
escape = false;
1133+
}
1134+
't' => {
1135+
if escape {
1136+
res.push(9);
1137+
} else {
1138+
res.push(116);
1139+
}
1140+
escape = false;
1141+
}
1142+
'n' => {
1143+
if escape {
1144+
res.push(10);
1145+
} else {
1146+
res.push(110)
1147+
}
1148+
escape = false;
1149+
}
1150+
'r' => {
1151+
if escape {
1152+
res.push(13);
1153+
} else {
1154+
res.push(114)
1155+
}
1156+
escape = false;
1157+
}
1158+
x => {
1159+
if hex_on {
1160+
if x.is_ascii_hexdigit() {
1161+
if hex_value.is_empty() {
1162+
hex_value.push(x);
1163+
continue;
1164+
} else {
1165+
hex_value.push(x);
1166+
res.push(u8::from_str_radix(&hex_value, 16).unwrap());
1167+
hex_on = false;
1168+
hex_value.clear();
1169+
}
1170+
} else {
1171+
return Err(LexicalError::StringError);
1172+
}
1173+
} else {
1174+
if escape {
1175+
res.push(92);
1176+
}
1177+
res.push(x as u8);
1178+
}
1179+
escape = false;
1180+
}
1181+
}
1182+
}
1183+
1184+
Ok(res)
1185+
}
11081186
}
11091187

11101188
/* Implement iterator pattern for the get_tok function.
@@ -1520,4 +1598,28 @@ mod tests {
15201598
test_string_continuation_mac_eol: MAC_EOL,
15211599
test_string_continuation_unix_eol: UNIX_EOL,
15221600
}
1601+
1602+
#[test]
1603+
fn test_byte() {
1604+
// single quote
1605+
let all = r##"b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff'"##;
1606+
let source = String::from(all);
1607+
let tokens = lex_source(&source);
1608+
let res = (0..=255).collect::<Vec<u8>>();
1609+
assert_eq!(tokens, vec![Tok::Bytes { value: res }]);
1610+
1611+
// double quote
1612+
let all = r##"b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff""##;
1613+
let source = String::from(all);
1614+
let tokens = lex_source(&source);
1615+
let res = (0..=255).collect::<Vec<u8>>();
1616+
assert_eq!(tokens, vec![Tok::Bytes { value: res }]);
1617+
1618+
// backslash doesnt escape
1619+
let all = r##"b"omkmok\Xaa""##;
1620+
let source = String::from(all);
1621+
let tokens = lex_source(&source);
1622+
let res = vec![111, 109, 107, 109, 111, 107, 92, 88, 97, 97];
1623+
assert_eq!(tokens, vec![Tok::Bytes { value: res }]);
1624+
}
15231625
}

tests/snippets/bytes.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
with assertRaises(TypeError):
1111
bytes("bla")
1212

13+
assert b"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" == bytes(range(0,256))
14+
assert b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff' == bytes(range(0,256))
15+
assert b"omkmok\Xaa" == bytes([111, 109, 107, 109, 111, 107, 92, 88, 97, 97])
16+
1317

1418
a = b"abcd"
1519
b = b"ab"

0 commit comments

Comments
 (0)