Skip to content

Commit 456bc80

Browse files
committed
Rework frozen modules and directly deserialize to CodeObject<Literal>
1 parent 22bc2d2 commit 456bc80

File tree

12 files changed

+198
-116
lines changed

12 files changed

+198
-116
lines changed

compiler/core/src/bytecode.rs

Lines changed: 106 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
//! Implement python as a virtual machine with bytecodes. This module
22
//! implements bytecode structure.
33
4-
use crate::marshal::MarshalError;
54
use crate::{marshal, Location};
65
use bitflags::bitflags;
76
use itertools::Itertools;
@@ -46,6 +45,19 @@ pub trait ConstantBag: Sized + Copy {
4645
fn make_name(&self, name: &str) -> <Self::Constant as Constant>::Name;
4746
}
4847

48+
pub trait AsBag {
49+
type Bag: ConstantBag;
50+
#[allow(clippy::wrong_self_convention)]
51+
fn as_bag(self) -> Self::Bag;
52+
}
53+
54+
impl<Bag: ConstantBag> AsBag for Bag {
55+
type Bag = Self;
56+
fn as_bag(self) -> Self {
57+
self
58+
}
59+
}
60+
4961
#[derive(Clone, Copy)]
5062
pub struct BasicBag;
5163

@@ -1077,27 +1089,6 @@ impl<C: Constant> CodeObject<C> {
10771089
}
10781090
}
10791091

1080-
impl CodeObject<ConstantData> {
1081-
/// Load a code object from bytes
1082-
pub fn from_bytes(data: &[u8]) -> Result<Self, MarshalError> {
1083-
use lz4_flex::block::DecompressError;
1084-
let raw_bincode = lz4_flex::decompress_size_prepended(data).map_err(|e| match e {
1085-
DecompressError::OutputTooSmall { .. } | DecompressError::ExpectedAnotherByte => {
1086-
MarshalError::Eof
1087-
}
1088-
_ => MarshalError::InvalidBytecode,
1089-
})?;
1090-
marshal::deserialize_code(&mut &raw_bincode[..], BasicBag)
1091-
}
1092-
1093-
/// Serialize this bytecode to bytes.
1094-
pub fn to_bytes(&self) -> Vec<u8> {
1095-
let mut data = Vec::new();
1096-
marshal::serialize_code(&mut data, self);
1097-
lz4_flex::compress_prepend_size(&data)
1098-
}
1099-
}
1100-
11011092
impl<C: Constant> fmt::Display for CodeObject<C> {
11021093
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
11031094
self.display_inner(f, false, 1)?;
@@ -1483,32 +1474,81 @@ impl<C: Constant> fmt::Debug for CodeObject<C> {
14831474
}
14841475
}
14851476

1486-
/// A frozen module. Holds a code object and whether it is part of a package
1487-
#[derive(Debug)]
1488-
pub struct FrozenModule {
1489-
pub code: CodeObject<ConstantData>,
1490-
pub package: bool,
1491-
}
1492-
14931477
pub mod frozen_lib {
14941478
use super::*;
1495-
use marshal::{Read, Write};
1479+
use marshal::{Read, ReadBorrowed, Write};
14961480

1497-
/// Decode a library to a iterable of frozen modules
1498-
pub fn decode_lib(bytes: &[u8]) -> FrozenModulesIter {
1499-
let data = lz4_flex::decompress_size_prepended(bytes).unwrap();
1500-
let mut data = marshal::Cursor { data, position: 0 };
1501-
let remaining = data.read_u32().unwrap();
1502-
FrozenModulesIter { remaining, data }
1481+
/// A frozen module. Holds a frozen code object and whether it is part of a package
1482+
#[derive(Copy, Clone)]
1483+
pub struct FrozenModule<B = &'static [u8]> {
1484+
pub code: FrozenCodeObject<B>,
1485+
pub package: bool,
15031486
}
15041487

1505-
pub struct FrozenModulesIter {
1488+
#[derive(Copy, Clone)]
1489+
pub struct FrozenCodeObject<B> {
1490+
pub bytes: B,
1491+
}
1492+
1493+
impl<B: AsRef<[u8]>> FrozenCodeObject<B> {
1494+
/// Decode a frozen codeobject
1495+
#[inline]
1496+
pub fn decode<Bag: AsBag>(
1497+
&self,
1498+
bag: Bag,
1499+
) -> CodeObject<<Bag::Bag as ConstantBag>::Constant> {
1500+
Self::_decode(self.bytes.as_ref(), bag.as_bag())
1501+
}
1502+
fn _decode<Bag: ConstantBag>(data: &[u8], bag: Bag) -> CodeObject<Bag::Constant> {
1503+
let decompressed = lz4_flex::decompress_size_prepended(data)
1504+
.expect("deserialize frozen CodeObject failed");
1505+
marshal::deserialize_code(&mut &decompressed[..], bag)
1506+
.expect("deserializing frozen CodeObject failed")
1507+
}
1508+
}
1509+
1510+
impl FrozenCodeObject<Vec<u8>> {
1511+
pub fn encode<C: Constant>(code: &CodeObject<C>) -> Self {
1512+
let mut data = Vec::new();
1513+
marshal::serialize_code(&mut data, code);
1514+
let bytes = lz4_flex::compress_prepend_size(&data);
1515+
FrozenCodeObject { bytes }
1516+
}
1517+
}
1518+
1519+
#[repr(transparent)]
1520+
pub struct FrozenLib<B: ?Sized = [u8]> {
1521+
pub bytes: B,
1522+
}
1523+
1524+
impl<B: AsRef<[u8]> + ?Sized> FrozenLib<B> {
1525+
pub const fn from_ref(b: &B) -> &FrozenLib<B> {
1526+
unsafe { &*(b as *const B as *const FrozenLib<B>) }
1527+
}
1528+
1529+
/// Decode a library to a iterable of frozen modules
1530+
pub fn decode(&self) -> FrozenModulesIter<'_> {
1531+
let mut data = self.bytes.as_ref();
1532+
let remaining = data.read_u32().unwrap();
1533+
FrozenModulesIter { remaining, data }
1534+
}
1535+
}
1536+
1537+
impl<'a, B: AsRef<[u8]> + ?Sized> IntoIterator for &'a FrozenLib<B> {
1538+
type Item = (&'a str, FrozenModule<&'a [u8]>);
1539+
type IntoIter = FrozenModulesIter<'a>;
1540+
fn into_iter(self) -> Self::IntoIter {
1541+
self.decode()
1542+
}
1543+
}
1544+
1545+
pub struct FrozenModulesIter<'a> {
15061546
remaining: u32,
1507-
data: marshal::Cursor<Vec<u8>>,
1547+
data: &'a [u8],
15081548
}
15091549

1510-
impl Iterator for FrozenModulesIter {
1511-
type Item = (String, FrozenModule);
1550+
impl<'a> Iterator for FrozenModulesIter<'a> {
1551+
type Item = (&'a str, FrozenModule<&'a [u8]>);
15121552

15131553
fn next(&mut self) -> Option<Self::Item> {
15141554
if self.remaining > 0 {
@@ -1524,42 +1564,47 @@ pub mod frozen_lib {
15241564
(self.remaining as usize, Some(self.remaining as usize))
15251565
}
15261566
}
1527-
impl ExactSizeIterator for FrozenModulesIter {}
1567+
impl ExactSizeIterator for FrozenModulesIter<'_> {}
15281568

1529-
fn read_entry(rdr: &mut impl Read) -> Result<(String, FrozenModule), marshal::MarshalError> {
1569+
fn read_entry<'a>(
1570+
rdr: &mut &'a [u8],
1571+
) -> Result<(&'a str, FrozenModule<&'a [u8]>), marshal::MarshalError> {
15301572
let len = rdr.read_u32()?;
1531-
let name = rdr.read_str(len)?.to_owned();
1532-
let code = marshal::deserialize_code(rdr, BasicBag)?;
1573+
let name = rdr.read_str_borrow(len)?;
1574+
let len = rdr.read_u32()?;
1575+
let code_slice = rdr.read_slice_borrow(len)?;
1576+
let code = FrozenCodeObject { bytes: code_slice };
15331577
let package = rdr.read_u8()? != 0;
15341578
Ok((name, FrozenModule { code, package }))
15351579
}
15361580

1537-
/// Encode the given iterator of frozen modules into a compressed vector of bytes
1538-
pub fn encode_lib<'a, I>(lib: I) -> Vec<u8>
1539-
where
1540-
I: IntoIterator<Item = (&'a str, &'a FrozenModule)>,
1541-
I::IntoIter: ExactSizeIterator + Clone,
1542-
{
1543-
let iter = lib.into_iter();
1544-
let mut data = Vec::new();
1545-
write_lib(&mut data, iter);
1546-
lz4_flex::compress_prepend_size(&data)
1581+
impl FrozenLib<Vec<u8>> {
1582+
/// Encode the given iterator of frozen modules into a compressed vector of bytes
1583+
pub fn encode<'a, I, B: AsRef<[u8]>>(lib: I) -> FrozenLib<Vec<u8>>
1584+
where
1585+
I: IntoIterator<Item = (&'a str, FrozenModule<B>)>,
1586+
I::IntoIter: ExactSizeIterator + Clone,
1587+
{
1588+
let iter = lib.into_iter();
1589+
let mut bytes = Vec::new();
1590+
write_lib(&mut bytes, iter);
1591+
Self { bytes }
1592+
}
15471593
}
15481594

1549-
fn write_lib<'a>(
1550-
buf: &mut impl Write,
1551-
lib: impl ExactSizeIterator<Item = (&'a str, &'a FrozenModule)>,
1595+
fn write_lib<'a, B: AsRef<[u8]>>(
1596+
buf: &mut Vec<u8>,
1597+
lib: impl ExactSizeIterator<Item = (&'a str, FrozenModule<B>)>,
15521598
) {
15531599
marshal::write_len(buf, lib.len());
15541600
for (name, module) in lib {
15551601
write_entry(buf, name, module);
15561602
}
15571603
}
15581604

1559-
fn write_entry(buf: &mut impl Write, name: &str, module: &FrozenModule) {
1560-
marshal::write_len(buf, name.len());
1561-
buf.write_slice(name.as_bytes());
1562-
marshal::serialize_code(buf, &module.code);
1605+
fn write_entry(buf: &mut Vec<u8>, name: &str, module: FrozenModule<impl AsRef<[u8]>>) {
1606+
marshal::write_vec(buf, name.as_bytes());
1607+
marshal::write_vec(buf, module.code.bytes.as_ref());
15631608
buf.write_u8(module.package as u8);
15641609
}
15651610
}

compiler/core/src/marshal.rs

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -130,8 +130,21 @@ pub trait Read {
130130
}
131131
}
132132

133+
pub(crate) trait ReadBorrowed<'a>: Read {
134+
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]>;
135+
fn read_str_borrow(&mut self, len: u32) -> Result<&'a str> {
136+
Ok(std::str::from_utf8(self.read_slice_borrow(len)?)?)
137+
}
138+
}
139+
133140
impl Read for &[u8] {
134141
fn read_slice(&mut self, n: u32) -> Result<&[u8]> {
142+
self.read_slice_borrow(n)
143+
}
144+
}
145+
146+
impl<'a> ReadBorrowed<'a> for &'a [u8] {
147+
fn read_slice_borrow(&mut self, n: u32) -> Result<&'a [u8]> {
135148
let data = self.get(..n as usize).ok_or(MarshalError::Eof)?;
136149
*self = &self[n as usize..];
137150
Ok(data)
@@ -474,6 +487,11 @@ pub(crate) fn write_len<W: Write>(buf: &mut W, len: usize) {
474487
buf.write_u32(len);
475488
}
476489

490+
pub(crate) fn write_vec<W: Write>(buf: &mut W, slice: &[u8]) {
491+
write_len(buf, slice.len());
492+
buf.write_slice(slice);
493+
}
494+
477495
pub fn serialize_value<W: Write, D: Dumpable>(
478496
buf: &mut W,
479497
constant: DumpableValue<'_, D>,
@@ -501,13 +519,11 @@ pub fn serialize_value<W: Write, D: Dumpable>(
501519
}
502520
DumpableValue::Str(s) => {
503521
buf.write_u8(Type::Unicode as u8);
504-
write_len(buf, s.len());
505-
buf.write_slice(s.as_bytes());
522+
write_vec(buf, s.as_bytes());
506523
}
507524
DumpableValue::Bytes(b) => {
508525
buf.write_u8(Type::Bytes as u8);
509-
write_len(buf, b.len());
510-
buf.write_slice(b);
526+
write_vec(buf, b);
511527
}
512528
DumpableValue::Code(c) => {
513529
buf.write_u8(Type::Code as u8);
@@ -580,14 +596,12 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
580596
buf.write_u32(code.arg_count);
581597
buf.write_u32(code.kwonlyarg_count);
582598

583-
write_len(buf, code.source_path.as_ref().len());
584-
buf.write_slice(code.source_path.as_ref().as_bytes());
599+
write_vec(buf, code.source_path.as_ref().as_bytes());
585600

586601
buf.write_u32(code.first_line_number);
587602
buf.write_u32(code.max_stackdepth);
588603

589-
write_len(buf, code.obj_name.as_ref().len());
590-
buf.write_slice(code.obj_name.as_ref().as_bytes());
604+
write_vec(buf, code.obj_name.as_ref().as_bytes());
591605

592606
let cell2arg = code.cell2arg.as_deref().unwrap_or(&[]);
593607
write_len(buf, cell2arg.len());
@@ -603,8 +617,7 @@ pub fn serialize_code<W: Write, C: Constant>(buf: &mut W, code: &CodeObject<C>)
603617
let mut write_names = |names: &[C::Name]| {
604618
write_len(buf, names.len());
605619
for name in names {
606-
write_len(buf, name.as_ref().len());
607-
buf.write_slice(name.as_ref().as_bytes());
620+
write_vec(buf, name.as_ref().as_bytes());
608621
}
609622
};
610623

derive-impl/src/compile_bytecode.rs

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ use crate::{extract_spans, Diagnostic};
1717
use once_cell::sync::Lazy;
1818
use proc_macro2::{Span, TokenStream};
1919
use quote::quote;
20-
use rustpython_compiler_core::{CodeObject, FrozenModule, Mode};
20+
use rustpython_compiler_core::{frozen_lib, CodeObject, Mode};
2121
use std::{
2222
collections::HashMap,
2323
env, fs,
@@ -44,6 +44,11 @@ enum CompilationSourceKind {
4444
Dir(PathBuf),
4545
}
4646

47+
struct CompiledModule {
48+
code: CodeObject,
49+
package: bool,
50+
}
51+
4752
struct CompilationSource {
4853
kind: CompilationSourceKind,
4954
span: (Span, Span),
@@ -80,7 +85,7 @@ impl CompilationSource {
8085
mode: Mode,
8186
module_name: String,
8287
compiler: &dyn Compiler,
83-
) -> Result<HashMap<String, FrozenModule>, Diagnostic> {
88+
) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
8489
match &self.kind {
8590
CompilationSourceKind::Dir(rel_path) => self.compile_dir(
8691
&CARGO_MANIFEST_DIR.join(rel_path),
@@ -89,7 +94,7 @@ impl CompilationSource {
8994
compiler,
9095
),
9196
_ => Ok(hashmap! {
92-
module_name.clone() => FrozenModule {
97+
module_name.clone() => CompiledModule {
9398
code: self.compile_single(mode, module_name, compiler)?,
9499
package: false,
95100
},
@@ -131,7 +136,7 @@ impl CompilationSource {
131136
parent: String,
132137
mode: Mode,
133138
compiler: &dyn Compiler,
134-
) -> Result<HashMap<String, FrozenModule>, Diagnostic> {
139+
) -> Result<HashMap<String, CompiledModule>, Diagnostic> {
135140
let mut code_map = HashMap::new();
136141
let paths = fs::read_dir(path)
137142
.or_else(|e| {
@@ -217,7 +222,7 @@ impl CompilationSource {
217222

218223
code_map.insert(
219224
module_name,
220-
FrozenModule {
225+
CompiledModule {
221226
code,
222227
package: is_init,
223228
},
@@ -369,12 +374,11 @@ pub fn impl_py_compile(
369374
.source
370375
.compile_single(args.mode, args.module_name, compiler)?;
371376

372-
let bytes = code.to_bytes();
373-
let bytes = LitByteStr::new(&bytes, Span::call_site());
377+
let frozen = frozen_lib::FrozenCodeObject::encode(&code);
378+
let bytes = LitByteStr::new(&frozen.bytes, Span::call_site());
374379

375380
let output = quote! {
376-
#crate_name::CodeObject::from_bytes(#bytes)
377-
.expect("Deserializing CodeObject failed")
381+
#crate_name::frozen_lib::FrozenCodeObject { bytes: &#bytes[..] }
378382
};
379383

380384
Ok(output)
@@ -390,12 +394,17 @@ pub fn impl_py_freeze(
390394
let crate_name = args.crate_name;
391395
let code_map = args.source.compile(args.mode, args.module_name, compiler)?;
392396

393-
let data =
394-
rustpython_compiler_core::frozen_lib::encode_lib(code_map.iter().map(|(k, v)| (&**k, v)));
395-
let bytes = LitByteStr::new(&data, Span::call_site());
397+
let data = frozen_lib::FrozenLib::encode(code_map.iter().map(|(k, v)| {
398+
let v = frozen_lib::FrozenModule {
399+
code: frozen_lib::FrozenCodeObject::encode(&v.code),
400+
package: v.package,
401+
};
402+
(&**k, v)
403+
}));
404+
let bytes = LitByteStr::new(&data.bytes, Span::call_site());
396405

397406
let output = quote! {
398-
#crate_name::frozen_lib::decode_lib(#bytes)
407+
#crate_name::frozen_lib::FrozenLib::from_ref(#bytes)
399408
};
400409

401410
Ok(output)

0 commit comments

Comments
 (0)