Skip to content

Commit da63324

Browse files
authored
Merge pull request RustPython#2403 from RustPython/coolreader18/bincode-libmap
Serialize entire frozen hashmap to bytes
2 parents dd8d3c1 + 58503cb commit da63324

File tree

6 files changed

+152
-52
lines changed

6 files changed

+152
-52
lines changed

bytecode/src/lib.rs

Lines changed: 130 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1075,7 +1075,7 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
10751075
}
10761076
}
10771077

1078-
#[derive(Serialize, Deserialize)]
1078+
#[derive(Serialize, Deserialize, Debug)]
10791079
pub struct FrozenModule<C: Constant = ConstantData> {
10801080
#[serde(bound(
10811081
deserialize = "C: serde::Deserialize<'de>, C::Name: serde::Deserialize<'de>",
@@ -1084,3 +1084,132 @@ pub struct FrozenModule<C: Constant = ConstantData> {
10841084
pub code: CodeObject<C>,
10851085
pub package: bool,
10861086
}
1087+
1088+
pub mod frozen_lib {
1089+
use super::*;
1090+
use bincode::{options, Options};
1091+
use std::convert::TryInto;
1092+
use std::io;
1093+
1094+
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
1095+
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
1096+
let r = VecReader { data, pos: 0 };
1097+
let mut de = bincode::Deserializer::with_bincode_read(r, options());
1098+
let len = u64::deserialize(&mut de).unwrap().try_into().unwrap();
1099+
FrozenModulesIter { len, de }
1100+
}
1101+
1102+
pub struct FrozenModulesIter {
1103+
len: usize,
1104+
// ideally this could be a SeqAccess, but I think that would require existential types
1105+
de: bincode::Deserializer<VecReader, bincode::DefaultOptions>,
1106+
}
1107+
1108+
impl Iterator for FrozenModulesIter {
1109+
type Item = (String, FrozenModule);
1110+
1111+
fn next(&mut self) -> Option<Self::Item> {
1112+
// manually mimic bincode's seq encoding, which is <len:u64> <element*len>
1113+
// This probably won't change (bincode doesn't require padding or anything), but
1114+
// it's not guaranteed by semver as far as I can tell
1115+
if self.len > 0 {
1116+
let entry = Deserialize::deserialize(&mut self.de).unwrap();
1117+
self.len -= 1;
1118+
Some(entry)
1119+
} else {
1120+
None
1121+
}
1122+
}
1123+
1124+
fn size_hint(&self) -> (usize, Option<usize>) {
1125+
(self.len, Some(self.len))
1126+
}
1127+
}
1128+
1129+
impl ExactSizeIterator for FrozenModulesIter {}
1130+
1131+
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
1132+
where
1133+
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>,
1134+
I::IntoIter: ExactSizeIterator + Clone,
1135+
{
1136+
let iter = lib.into_iter();
1137+
let data = options().serialize(&SerializeLib { iter }).unwrap();
1138+
lz4_flex::compress_prepend_size(&data)
1139+
}
1140+
1141+
struct SerializeLib<I> {
1142+
iter: I,
1143+
}
1144+
1145+
impl<'a, I> Serialize for SerializeLib<I>
1146+
where
1147+
I: ExactSizeIterator<Item = (&'a str, &'a FrozenModule)> + Clone,
1148+
{
1149+
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1150+
where
1151+
S: serde::Serializer,
1152+
{
1153+
serializer.collect_seq(self.iter.clone())
1154+
}
1155+
}
1156+
1157+
/// Owned version of bincode::de::read::SliceReader<'a>
1158+
struct VecReader {
1159+
data: Vec<u8>,
1160+
pos: usize,
1161+
}
1162+
1163+
impl io::Read for VecReader {
1164+
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
1165+
let mut subslice = &self.data[self.pos..];
1166+
let n = io::Read::read(&mut subslice, buf)?;
1167+
self.pos += n;
1168+
Ok(n)
1169+
}
1170+
fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
1171+
self.get_byte_slice(buf.len())
1172+
.map(|data| buf.copy_from_slice(data))
1173+
}
1174+
}
1175+
1176+
impl VecReader {
1177+
#[inline(always)]
1178+
fn get_byte_slice(&mut self, length: usize) -> io::Result<&[u8]> {
1179+
let subslice = &self.data[self.pos..];
1180+
match subslice.get(..length) {
1181+
Some(ret) => {
1182+
self.pos += length;
1183+
Ok(ret)
1184+
}
1185+
None => Err(io::ErrorKind::UnexpectedEof.into()),
1186+
}
1187+
}
1188+
}
1189+
1190+
impl<'storage> bincode::BincodeRead<'storage> for VecReader {
1191+
fn forward_read_str<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
1192+
where
1193+
V: serde::de::Visitor<'storage>,
1194+
{
1195+
let bytes = self.get_byte_slice(length)?;
1196+
match ::std::str::from_utf8(bytes) {
1197+
Ok(s) => visitor.visit_str(s),
1198+
Err(e) => Err(bincode::ErrorKind::InvalidUtf8Encoding(e).into()),
1199+
}
1200+
}
1201+
1202+
fn get_byte_buffer(&mut self, length: usize) -> bincode::Result<Vec<u8>> {
1203+
self.get_byte_slice(length)
1204+
.map(|x| x.to_vec())
1205+
.map_err(Into::into)
1206+
}
1207+
1208+
fn forward_read_bytes<V>(&mut self, length: usize, visitor: V) -> bincode::Result<V::Value>
1209+
where
1210+
V: serde::de::Visitor<'storage>,
1211+
{
1212+
visitor.visit_bytes(self.get_byte_slice(length)?)
1213+
}
1214+
}
1215+
}

derive/src/compile_bytecode.rs

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -330,30 +330,11 @@ pub fn impl_py_freeze(input: TokenStream2) -> Result<TokenStream2, Diagnostic> {
330330
let crate_name = args.crate_name;
331331
let code_map = args.source.compile(args.mode, args.module_name)?;
332332

333-
let modules_len = code_map.len();
334-
335-
let modules = code_map
336-
.into_iter()
337-
.map(|(module_name, FrozenModule { code, package })| {
338-
let module_name = LitStr::new(&module_name, Span::call_site());
339-
let bytes = code.to_bytes();
340-
let bytes = LitByteStr::new(&bytes, Span::call_site());
341-
quote! {
342-
m.insert(#module_name.into(), #crate_name::FrozenModule {
343-
code: #crate_name::CodeObject::from_bytes(
344-
#bytes
345-
).expect("Deserializing CodeObject failed"),
346-
package: #package,
347-
});
348-
}
349-
});
333+
let data = rustpython_bytecode::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v)));
334+
let bytes = LitByteStr::new(&data, Span::call_site());
350335

351336
let output = quote! {
352-
{
353-
let mut m = ::std::collections::HashMap::with_capacity(#modules_len);
354-
#(#modules)*
355-
m
356-
}
337+
#crate_name::frozen_lib::decode_lib(#bytes)
357338
};
358339

359340
Ok(output)

examples/freeze/main.rs

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
use std::collections::HashMap;
2-
31
use rustpython_vm as vm;
42

53
fn main() -> vm::pyobject::PyResult<()> {
@@ -11,13 +9,9 @@ fn run(vm: &vm::VirtualMachine) -> vm::pyobject::PyResult<()> {
119

1210
// the file parameter is relevant to the directory where the crate's Cargo.toml is located, see $CARGO_MANIFEST_DIR:
1311
// https://doc.rust-lang.org/cargo/reference/environment-variables.html#environment-variables-cargo-sets-for-crates
14-
let modules: HashMap<String, vm::bytecode::FrozenModule> =
15-
vm::py_freeze!(file = "examples/freeze/freeze.py");
12+
let module = vm::py_compile!(file = "examples/freeze/freeze.py");
1613

17-
let res = vm.run_code_obj(
18-
vm.new_code_object(modules.get("frozen").unwrap().code.clone()),
19-
scope,
20-
);
14+
let res = vm.run_code_obj(vm.new_code_object(module), scope);
2115

2216
if let Err(err) = res {
2317
vm::exceptions::print_exception(&vm, err);

vm/pylib-crate/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
pub const LIB_PATH: &str = concat!(env!("CARGO_MANIFEST_DIR"), "/Lib");
66

77
#[cfg(feature = "compiled-bytecode")]
8-
use {rustpython_bytecode::FrozenModule, std::collections::HashMap};
8+
use rustpython_bytecode::FrozenModule;
99
#[cfg(feature = "compiled-bytecode")]
10-
pub fn frozen_stdlib() -> HashMap<String, FrozenModule> {
10+
pub fn frozen_stdlib() -> impl Iterator<Item = (String, FrozenModule)> {
1111
rustpython_derive::py_freeze!(dir = "Lib", crate_name = "rustpython_bytecode")
1212
}

vm/src/frozen.rs

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use crate::builtins::code;
22
use crate::bytecode;
33
use crate::VirtualMachine;
4-
use std::collections::HashMap;
54

65
pub fn map_frozen<'a>(
76
vm: &'a VirtualMachine,
@@ -19,18 +18,19 @@ pub fn map_frozen<'a>(
1918
})
2019
}
2120

22-
pub fn get_module_inits(
23-
vm: &VirtualMachine,
24-
) -> HashMap<String, code::FrozenModule, ahash::RandomState> {
25-
let mut modules = HashMap::default();
26-
21+
pub fn get_module_inits() -> impl Iterator<Item = (String, bytecode::FrozenModule)> {
22+
let iter = std::iter::empty();
2723
macro_rules! ext_modules {
28-
($($t:tt)*) => {
29-
modules.extend(map_frozen(vm, py_freeze!($($t)*)));
24+
($iter:ident, ($modules:expr)) => {
25+
let $iter = $iter.chain($modules);
26+
};
27+
($iter:ident, $($t:tt)*) => {
28+
ext_modules!($iter, (py_freeze!($($t)*)))
3029
};
3130
}
3231

3332
ext_modules!(
33+
iter,
3434
source = "initialized = True; print(\"Hello world!\")\n",
3535
module_name = "__hello__",
3636
);
@@ -39,19 +39,15 @@ pub fn get_module_inits(
3939
// in theory be implemented in Rust, but are easiest to do in Python for one reason or another.
4040
// Includes _importlib_bootstrap and _importlib_bootstrap_external
4141
// For Windows: did you forget to run `powershell scripts\symlinks-to-hardlinks.ps1`?
42-
ext_modules!(dir = "Lib/python_builtins/");
42+
ext_modules!(iter, dir = "Lib/python_builtins/");
4343

4444
#[cfg(not(feature = "freeze-stdlib"))]
45-
{
46-
// core stdlib Python modules that the vm calls into, but are still used in Python
47-
// application code, e.g. copyreg
48-
ext_modules!(dir = "Lib/core_modules/");
49-
}
45+
// core stdlib Python modules that the vm calls into, but are still used in Python
46+
// application code, e.g. copyreg
47+
ext_modules!(iter, dir = "Lib/core_modules/");
5048
// if we're on freeze-stdlib, the core stdlib modules will be included anyway
5149
#[cfg(feature = "freeze-stdlib")]
52-
{
53-
modules.extend(map_frozen(vm, rustpython_pylib::frozen_stdlib()));
54-
}
50+
ext_modules!(iter, (rustpython_pylib::frozen_stdlib()));
5551

56-
modules
52+
iter
5753
}

vm/src/vm.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ impl VirtualMachine {
282282
initialized: false,
283283
};
284284

285-
let frozen = frozen::get_module_inits(&vm);
285+
let frozen = frozen::map_frozen(&vm, frozen::get_module_inits()).collect();
286286
PyRc::get_mut(&mut vm.state).unwrap().frozen = frozen;
287287

288288
module::init_module_dict(

0 commit comments

Comments
 (0)