From 2ff414b18d0c1d94284eff2011109ba610d045d9 Mon Sep 17 00:00:00 2001 From: Justin Ridgewell Date: Fri, 4 Nov 2022 15:42:18 -0400 Subject: [PATCH] Implement Ropes for shared string construction (#2525) --- Cargo.lock | 13 +- cli/libturbo.h | 2 +- crates/next-core/src/app_source.rs | 31 +- .../src/next_client_component/with_chunks.rs | 3 +- .../with_client_chunks.rs | 3 +- crates/next-core/src/nodejs/mod.rs | 4 +- crates/turbo-tasks-env/src/dotenv.rs | 2 +- crates/turbo-tasks-fs/Cargo.toml | 1 + .../turbo-tasks-fs/examples/hash_directory.rs | 18 +- crates/turbo-tasks-fs/examples/hash_glob.rs | 18 +- crates/turbo-tasks-fs/src/lib.rs | 76 ++- crates/turbo-tasks-fs/src/rope.rs | 494 ++++++++++++++++++ .../src/deterministic_hash.rs | 17 +- crates/turbopack-cli-utils/src/issue.rs | 2 +- crates/turbopack-core/Cargo.toml | 1 - crates/turbopack-core/src/code_builder.rs | 141 +++-- crates/turbopack-core/src/introspect/asset.rs | 4 +- crates/turbopack-core/src/source_map.rs | 100 ++-- crates/turbopack-core/src/source_pos.rs | 86 ++- crates/turbopack-css/src/module_asset.rs | 4 +- crates/turbopack-css/src/parse.rs | 12 +- crates/turbopack-dev-server/src/lib.rs | 9 +- crates/turbopack-dev-server/src/source/mod.rs | 3 +- .../turbopack-ecmascript/src/chunk/loader.rs | 13 +- crates/turbopack-ecmascript/src/chunk/mod.rs | 47 +- .../src/chunk/source_map.rs | 8 +- .../src/chunk_group_files_asset.rs | 2 +- crates/turbopack-ecmascript/src/lib.rs | 5 +- crates/turbopack-ecmascript/src/parse.rs | 14 +- .../src/resolve/node_native_binding.rs | 4 +- crates/turbopack-env/src/asset.rs | 2 +- crates/turbopack-json/src/lib.rs | 2 +- crates/turbopack-static/src/lib.rs | 3 +- crates/turbopack-tests/tests/snapshot.rs | 6 +- shim/src/ffi.rs | 12 +- 35 files changed, 898 insertions(+), 264 deletions(-) create mode 100644 crates/turbo-tasks-fs/src/rope.rs diff --git a/Cargo.lock b/Cargo.lock index a2f41a7b82800..0d0a918897676 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -144,17 +144,6 @@ dependencies = [ "syn 1.0.99", ] -[[package]] -name = "async-recursion" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea" -dependencies = [ - "proc-macro2 1.0.43", - "quote 1.0.21", - "syn 1.0.99", -] - [[package]] name = "async-stream" version = "0.3.3" @@ -5159,6 +5148,7 @@ version = "0.1.0" dependencies = [ "anyhow", "bitflags", + "bytes", "concurrent-queue", "futures", "futures-retry", @@ -5306,7 +5296,6 @@ name = "turbopack-core" version = "0.1.0" dependencies = [ "anyhow", - "async-recursion", "async-trait", "browserslist-rs", "futures", diff --git a/cli/libturbo.h b/cli/libturbo.h index 2df3868ced39e..7768fc503b44b 100644 --- a/cli/libturbo.h +++ b/cli/libturbo.h @@ -1,6 +1,6 @@ /* Code generated by cmd/cgo; DO NOT EDIT. */ -/* package github.com/vercel/turborepo/cli/cmd/turbo */ +/* package github.com/vercel/turbo/cli/cmd/turbo */ #line 1 "cgo-builtin-export-prolog" diff --git a/crates/next-core/src/app_source.rs b/crates/next-core/src/app_source.rs index 1df2a576fb112..91be00d48fc45 100644 --- a/crates/next-core/src/app_source.rs +++ b/crates/next-core/src/app_source.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, fmt::Write}; +use std::{collections::HashMap, io::Write}; use anyhow::{anyhow, Context, Result}; use turbo_tasks::{ @@ -7,8 +7,8 @@ use turbo_tasks::{ }; use turbo_tasks_env::ProcessEnvVc; use turbo_tasks_fs::{ - DirectoryContent, DirectoryEntry, File, FileContent, FileContentVc, FileSystemEntryType, - FileSystemPathVc, + rope::RopeBuilder, DirectoryContent, DirectoryEntry, File, FileContent, FileContentVc, + FileSystemEntryType, FileSystemPathVc, }; use turbopack::{ ecmascript::EcmascriptInputTransform, @@ -494,15 +494,17 @@ impl NodeEntry for AppRenderer { .into_iter() .try_join() .await?; - let mut result = - "import IPC, { Ipc } from \"@vercel/turbopack-next/internal/ipc\";\n".to_string(); + let mut result = RopeBuilder::from( + "import IPC, { Ipc } from \"@vercel/turbopack-next/internal/ipc\";\n", + ); + for (_, import) in segments.iter() { if let Some((p, identifier, chunks_identifier)) = import { + result += r#"("TURBOPACK {{ transition: next-layout-entry; chunking-type: parallel }}"); +"#; writeln!( result, - r#"("TURBOPACK {{ transition: next-layout-entry; chunking-type: parallel }}"); -import {}, {{ chunks as {} }} from {}; -"#, + "import {}, {{ chunks as {} }} from {};\n", identifier, chunks_identifier, stringify_str(p) @@ -518,7 +520,8 @@ import BOOTSTRAP from {}; stringify_str(&page) )?; } - result.push_str("const LAYOUT_INFO = ["); + + result += "const LAYOUT_INFO = ["; for (segment_str_lit, import) in segments.iter() { if let Some((_, identifier, chunks_identifier)) = import { writeln!( @@ -530,13 +533,15 @@ import BOOTSTRAP from {}; writeln!(result, " {{ segment: {segment_str_lit} }},",)? } } - result.push_str("];\n\n"); + result += "];\n\n"; + let base_code = next_js_file("entry/app-renderer.tsx"); - let mut file = File::from(result); if let FileContent::Content(base_file) = &*base_code.await? { - file.push_content(base_file.content()); + result += base_file.content() } - let asset = VirtualAssetVc::new(path.join("entry"), FileContent::Content(file).into()); + + let file = File::from(result.build()); + let asset = VirtualAssetVc::new(path.join("entry"), file.into()); let (context, intermediate_output_path) = if is_rsc { (self.context, self.intermediate_output_path.join("__rsc__")) } else { diff --git a/crates/next-core/src/next_client_component/with_chunks.rs b/crates/next-core/src/next_client_component/with_chunks.rs index 0ed0e646f1057..90a03e3b11bac 100644 --- a/crates/next-core/src/next_client_component/with_chunks.rs +++ b/crates/next-core/src/next_client_component/with_chunks.rs @@ -129,7 +129,8 @@ const chunks = {}; ", module_id, Value::Array(client_chunks) - ), + ) + .into(), ..Default::default() } .cell()) diff --git a/crates/next-core/src/next_client_component/with_client_chunks.rs b/crates/next-core/src/next_client_component/with_client_chunks.rs index 7490cc856ec53..2c624e3b14fec 100644 --- a/crates/next-core/src/next_client_component/with_client_chunks.rs +++ b/crates/next-core/src/next_client_component/with_client_chunks.rs @@ -136,7 +136,8 @@ const chunks = {}; ", module_id, Value::Array(client_chunks) - ), + ) + .into(), ..Default::default() } .cell()) diff --git a/crates/next-core/src/nodejs/mod.rs b/crates/next-core/src/nodejs/mod.rs index 2f4c6abf2a722..10b7642a57976 100644 --- a/crates/next-core/src/nodejs/mod.rs +++ b/crates/next-core/src/nodejs/mod.rs @@ -569,7 +569,7 @@ async fn run_proxy_operation( Ok(ProxyResult { status, headers, - body, + body: body.into(), }) } @@ -610,7 +610,7 @@ async fn proxy_error( "content-type".to_string(), "text/html; charset=utf-8".to_string(), ], - body: body.into_bytes(), + body: body.into(), } .cell()) } diff --git a/crates/turbo-tasks-env/src/dotenv.rs b/crates/turbo-tasks-env/src/dotenv.rs index 54718a02911ea..b3c1807c156e8 100644 --- a/crates/turbo-tasks-env/src/dotenv.rs +++ b/crates/turbo-tasks-env/src/dotenv.rs @@ -53,7 +53,7 @@ impl ProcessEnv for DotenvProcessEnv { // from_read will load parse and evalute the Read, and set variables // into the global env. If a later dotenv defines an already defined // var, it'll be ignored. - res = dotenvy::from_read(f.content()); + res = dotenvy::from_read(f.read()); vars = env::vars().collect(); restore_env(&vars, &initial); diff --git a/crates/turbo-tasks-fs/Cargo.toml b/crates/turbo-tasks-fs/Cargo.toml index 05144768367ff..2713fe952dc02 100644 --- a/crates/turbo-tasks-fs/Cargo.toml +++ b/crates/turbo-tasks-fs/Cargo.toml @@ -11,6 +11,7 @@ bench = false [dependencies] anyhow = "1.0.47" bitflags = "1.3.2" +bytes = "1.1.0" concurrent-queue = "1.2.2" futures = "0.3.24" futures-retry = "0.6.0" diff --git a/crates/turbo-tasks-fs/examples/hash_directory.rs b/crates/turbo-tasks-fs/examples/hash_directory.rs index 9ec77e017dc6a..f88ce41f642b1 100644 --- a/crates/turbo-tasks-fs/examples/hash_directory.rs +++ b/crates/turbo-tasks-fs/examples/hash_directory.rs @@ -5,6 +5,7 @@ use std::{ collections::BTreeMap, env::current_dir, + io::Read, time::{Duration, Instant}, }; @@ -86,7 +87,13 @@ async fn hash_directory(directory: FileSystemPathVc) -> Result { println!("{}: not found", directory.await?.path); } }; - let hash = hash_content(hashes.into_values().collect::>().join(",")); + let hash = hash_content( + &mut hashes + .into_values() + .collect::>() + .join(",") + .as_bytes(), + ); println!("hash_directory({})", dir_path); Ok(hash) } @@ -95,7 +102,7 @@ async fn hash_directory(directory: FileSystemPathVc) -> Result { async fn hash_file(file_path: FileSystemPathVc) -> Result { let content = file_path.read().await?; Ok(match &*content { - FileContent::Content(file) => hash_content(file), + FileContent::Content(file) => hash_content(&mut file.read()), FileContent::NotFound => { // report error StringVc::cell("".to_string()) @@ -103,9 +110,12 @@ async fn hash_file(file_path: FileSystemPathVc) -> Result { }) } -fn hash_content(content: impl AsRef<[u8]>) -> StringVc { +fn hash_content(content: &mut R) -> StringVc { let mut hasher = Sha256::new(); - hasher.update(content); + let mut buf = [0; 1024]; + while let Ok(size) = content.read(&mut buf) { + hasher.update(&buf[0..size]); + } let result = format!("{:x}", hasher.finalize()); StringVc::cell(result) diff --git a/crates/turbo-tasks-fs/examples/hash_glob.rs b/crates/turbo-tasks-fs/examples/hash_glob.rs index 5d08abcd7ad8b..624d986131ff6 100644 --- a/crates/turbo-tasks-fs/examples/hash_glob.rs +++ b/crates/turbo-tasks-fs/examples/hash_glob.rs @@ -5,6 +5,7 @@ use std::{ collections::BTreeMap, env::current_dir, + io::Read, time::{Duration, Instant}, }; @@ -79,7 +80,13 @@ async fn hash_glob_result(result: ReadGlobResultVc) -> Result { if hashes.is_empty() { return Ok(empty_string()); } - let hash = hash_content(hashes.into_values().collect::>().join(",")); + let hash = hash_content( + &mut hashes + .into_values() + .collect::>() + .join(",") + .as_bytes(), + ); Ok(hash) } @@ -87,7 +94,7 @@ async fn hash_glob_result(result: ReadGlobResultVc) -> Result { async fn hash_file(file_path: FileSystemPathVc) -> Result { let content = file_path.read().await?; Ok(match &*content { - FileContent::Content(file) => hash_content(file), + FileContent::Content(file) => hash_content(&mut file.read()), FileContent::NotFound => { // report error StringVc::cell("".to_string()) @@ -95,9 +102,12 @@ async fn hash_file(file_path: FileSystemPathVc) -> Result { }) } -fn hash_content(content: impl AsRef<[u8]>) -> StringVc { +fn hash_content(content: &mut R) -> StringVc { let mut hasher = Sha256::new(); - hasher.update(content); + let mut buf = [0; 1024]; + while let Ok(size) = content.read(&mut buf) { + hasher.update(&buf[0..size]); + } let result = format!("{:x}", hasher.finalize()); StringVc::cell(result) diff --git a/crates/turbo-tasks-fs/src/lib.rs b/crates/turbo-tasks-fs/src/lib.rs index 858fd08276f0e..478058fbdf797 100644 --- a/crates/turbo-tasks-fs/src/lib.rs +++ b/crates/turbo-tasks-fs/src/lib.rs @@ -11,6 +11,7 @@ pub mod glob; mod invalidator_map; mod read_glob; mod retry; +pub mod rope; pub mod util; use std::{ @@ -38,10 +39,7 @@ use read_glob::read_glob; pub use read_glob::{ReadGlobResult, ReadGlobResultVc}; use serde::{Deserialize, Serialize}; use serde_json::Value; -use tokio::{ - fs, - io::{AsyncReadExt, AsyncWriteExt}, -}; +use tokio::{fs, io::AsyncReadExt}; use turbo_tasks::{ primitives::{BoolVc, StringReadRef, StringVc}, spawn_thread, @@ -51,9 +49,12 @@ use turbo_tasks::{ use turbo_tasks_hash::hash_xxh3_hash64; use util::{join_path, normalize_path, sys_to_unix, unix_to_sys}; -use crate::retry::{retry_blocking, retry_future}; #[cfg(target_family = "windows")] use crate::util::is_windows_raw_path; +use crate::{ + retry::{retry_blocking, retry_future}, + rope::{Rope, RopeReadRef, RopeReader}, +}; #[turbo_tasks::value_trait] pub trait FileSystem: ValueToString { @@ -489,7 +490,7 @@ impl FileSystem for DiskFileSystem { let full_path = full_path_to_write.clone(); async move { let mut f = fs::File::create(&full_path).await?; - f.write_all(&file.content).await?; + tokio::io::copy(&mut file.read(), &mut f).await?; #[cfg(target_family = "unix")] f.set_permissions(file.meta.permissions.into()).await?; Ok::<(), io::Error>(()) @@ -1181,7 +1182,7 @@ pub enum LinkContent { pub struct File { meta: FileMeta, #[turbo_tasks(debug_ignore)] - content: Vec, + content: Rope, } impl File { @@ -1195,12 +1196,20 @@ impl File { Ok(File { meta: metadata.into(), - content: output, + content: Rope::from(output), }) } /// Creates a [File] from raw bytes. fn from_bytes(content: Vec) -> Self { + File { + meta: FileMeta::default(), + content: Rope::from(content), + } + } + + /// Creates a [File] from a rope. + fn from_rope(content: Rope) -> Self { File { meta: FileMeta::default(), content, @@ -1215,13 +1224,18 @@ impl File { self.meta.content_type = Some(content_type); self } + + /// Returns a Read/AsyncRead/Stream/Iterator to access the File's contents. + pub fn read(&self) -> RopeReader { + self.content.read() + } } impl Debug for File { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.debug_struct("File") .field("meta", &self.meta) - .field("content (hash)", &hash_xxh3_hash64(self.content.as_slice())) + .field("content (hash)", &hash_xxh3_hash64(&self.content)) .finish() } } @@ -1256,26 +1270,33 @@ impl From<&[u8]> for File { } } -impl File { - pub fn new(meta: FileMeta, content: Vec) -> Self { - Self { meta, content } +impl From for File { + fn from(rope: RopeReadRef) -> Self { + File::from_rope(rope.clone_value()) } +} - pub fn meta(&self) -> &FileMeta { - &self.meta +impl From for File { + fn from(rope: Rope) -> Self { + File::from_rope(rope) } +} - pub fn content(&self) -> &[u8] { - &self.content +impl File { + pub fn new(meta: FileMeta, content: Vec) -> Self { + Self { + meta, + content: Rope::from(content), + } } - pub fn push_content(&mut self, content: &[u8]) { - self.content.extend_from_slice(content); + /// Returns the associated [FileMeta] of this file. + pub fn meta(&self) -> &FileMeta { + &self.meta } -} -impl AsRef<[u8]> for File { - fn as_ref(&self) -> &[u8] { + /// Returns the immutable contents of this file. + pub fn content(&self) -> &Rope { &self.content } } @@ -1359,11 +1380,8 @@ impl FileContent { pub fn parse_json(&self) -> FileJsonContent { match self { - FileContent::Content(file) => match std::str::from_utf8(&file.content) { - Ok(string) => match serde_json::from_str(string) { - Ok(data) => FileJsonContent::Content(data), - Err(_) => FileJsonContent::Unparseable, - }, + FileContent::Content(file) => match serde_json::from_reader(file.read()) { + Ok(data) => FileJsonContent::Content(data), Err(_) => FileJsonContent::Unparseable, }, FileContent::NotFound => FileJsonContent::NotFound, @@ -1372,9 +1390,9 @@ impl FileContent { pub fn parse_json_with_comments(&self) -> FileJsonContent { match self { - FileContent::Content(file) => match std::str::from_utf8(&file.content) { + FileContent::Content(file) => match file.content.to_str() { Ok(string) => match parse_to_serde_value( - string, + &string, &ParseOptions { allow_comments: true, allow_trailing_commas: true, @@ -1395,7 +1413,7 @@ impl FileContent { pub fn lines(&self) -> FileLinesContent { match self { - FileContent::Content(file) => match std::str::from_utf8(&file.content) { + FileContent::Content(file) => match file.content.to_str() { Ok(string) => { let mut bytes_offset = 0; FileLinesContent::Lines( diff --git a/crates/turbo-tasks-fs/src/rope.rs b/crates/turbo-tasks-fs/src/rope.rs new file mode 100644 index 0000000000000..7013f84ad3aac --- /dev/null +++ b/crates/turbo-tasks-fs/src/rope.rs @@ -0,0 +1,494 @@ +use std::{ + borrow::Cow, + cmp::min, + fmt::Debug, + io::{self, BufRead, Read, Result as IoResult, Write}, + mem, ops, + pin::Pin, + sync::Arc, + task::{Context as TaskContext, Poll}, +}; + +use anyhow::{Context, Result}; +use bytes::{Buf, Bytes}; +use futures::Stream; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use tokio::io::{AsyncRead, ReadBuf}; +use turbo_tasks_hash::{DeterministicHash, DeterministicHasher}; +use RopeElem::{Local, Shared}; + +static EMPTY_BUF: &[u8] = &[]; + +/// A Rope provides an efficient structure for sharing bytes/strings between +/// multiple sources. Cloning a Rope is extremely cheap (Arc and usize), and +/// the sharing contents of one Rope can be shared by just cloning an Arc. +/// +/// Ropes are immutable, in order to construct one see [RopeBuilder]. +#[turbo_tasks::value(shared, serialization = "custom")] +#[derive(Clone, Debug, Default)] +pub struct Rope { + /// Total length of all held bytes. + length: usize, + + /// A shareable container holding the rope's bytes. + data: InnerRope, +} + +/// An Arc container for ropes. This indirection allows for easily sharing the +/// contents between Ropes (and also RopeBuilders/RopeReaders). +#[turbo_tasks::value(shared, serialization = "none", eq = "manual")] +#[derive(Clone, Debug, Default)] +struct InnerRope(#[turbo_tasks(debug_ignore, trace_ignore)] Arc>); + +/// Differentiates the types of stored bytes in a rope. +#[derive(Clone, Debug)] +enum RopeElem { + /// Local bytes are owned directly by this rope. + Local(Bytes), + + /// Shared holds the Arc container of another rope. + Shared(InnerRope), +} + +/// RopeBuilder provides a mutable container to append bytes/strings. This can +/// also append _other_ Rope instances cheaply, allowing efficient sharing of +/// the contents without a full clone of the bytes. +#[derive(Default)] +pub struct RopeBuilder { + /// Total length of all previously committed bytes. + length: usize, + + /// Immutable bytes references that have been appended to this builder. The + /// rope's is the combination of all these committed bytes. + committed: Vec, + + /// Mutable bytes collection where non-static/non-shared bytes are written. + /// This builds until the next time a static or shared bytes is + /// appended, in which case we split the buffer and commit. Finishing + /// the builder also commits these bytes. + writable: Vec, +} + +impl Rope { + pub fn len(&self) -> usize { + self.length + } + + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Returns a Read/AsyncRead/Stream/Iterator instance over all bytes. + pub fn read(&self) -> RopeReader { + RopeReader::new(&self.data) + } + + /// Returns a String instance of all bytes. + pub fn to_str(&self) -> Result> { + if self.data.len() == 1 { + if let Local(bytes) = &self.data[0] { + let utf8 = std::str::from_utf8(bytes); + return utf8 + .context("failed to convert rope into string") + .map(Cow::Borrowed); + } + } + + let mut read = self.read(); + let mut string = String::with_capacity(self.len()); + let res = read.read_to_string(&mut string); + res.context("failed to convert rope into string")?; + Ok(Cow::Owned(string)) + } +} + +impl> From for Rope { + fn from(bytes: T) -> Self { + let bytes = bytes.into(); + Rope { + length: bytes.len(), + data: InnerRope::from(bytes), + } + } +} + +impl RopeBuilder { + /// Push owned bytes into the Rope. + /// + /// If possible use [push_static_bytes] or `+=` operation instead, as they + /// will create a reference to shared memory instead of cloning the bytes. + pub fn push_bytes(&mut self, bytes: &[u8]) { + self.length += bytes.len(); + self.writable.extend(bytes); + } + + /// Push static lifetime bytes into the Rope. + /// + /// This is more efficient than pushing owned bytes, because the internal + /// data does not need to be copied when the rope is read. + pub fn push_static_bytes(&mut self, bytes: &'static [u8]) { + // If the string is smaller than the cost of a Bytes reference (4 usizes), then + // it's more efficient to own the bytes in a new buffer. We may be able to reuse + // that buffer when more bytes are pushed. + if bytes.len() < mem::size_of::() { + return self.push_bytes(bytes); + } + + // We may have pending bytes from a prior push. + self.finish(); + + self.length += bytes.len(); + self.committed.push(Local(bytes.into())); + } + + /// Concatenate another Rope instance into our builder. + /// + /// This is much more efficient than pushing actual bytes, since we can + /// share the other Rope's references without copying the underlying data. + pub fn concat(&mut self, other: &Rope) { + // We may have pending bytes from a prior push. + self.finish(); + + self.length += other.len(); + self.committed.push(Shared(other.data.clone())); + } + + /// Writes any pending bytes into our committed queue. + /// + /// This may be called multiple times without issue. + pub fn finish(&mut self) { + if !self.writable.is_empty() { + let writable = mem::take(&mut self.writable); + self.committed.push(Local(writable.into())); + } + } + + pub fn len(&self) -> usize { + self.length + } + + pub fn is_empty(&self) -> bool { + self.length == 0 + } + + /// Constructs our final, immutable Rope instance. + pub fn build(mut self) -> Rope { + self.finish(); + Rope { + length: self.length, + data: self.committed.into(), + } + } +} + +impl From<&'static str> for RopeBuilder { + default fn from(bytes: &'static str) -> Self { + let mut r = RopeBuilder::default(); + r.push_static_bytes(bytes.as_bytes()); + r + } +} + +impl From> for RopeBuilder { + fn from(bytes: Vec) -> Self { + RopeBuilder { + length: bytes.len(), + committed: vec![], + writable: bytes, + } + } +} + +impl Write for RopeBuilder { + fn write(&mut self, bytes: &[u8]) -> IoResult { + self.push_bytes(bytes); + Ok(bytes.len()) + } + + fn flush(&mut self) -> IoResult<()> { + self.finish(); + Ok(()) + } +} + +impl ops::AddAssign<&'static str> for RopeBuilder { + /// Pushes a reference to static memory onto the rope. + /// + /// This is more efficient than pushing owned bytes, because the internal + /// data does not need to be copied when the rope is read. + fn add_assign(&mut self, rhs: &'static str) { + self.push_static_bytes(rhs.as_bytes()); + } +} + +impl ops::AddAssign<&Rope> for RopeBuilder { + fn add_assign(&mut self, rhs: &Rope) { + self.concat(rhs); + } +} + +impl DeterministicHash for Rope { + /// Ropes with similar contents hash the same, regardless of their + /// structure. + fn deterministic_hash(&self, state: &mut H) { + state.write_usize(self.len()); + self.data.deterministic_hash(state); + } +} + +impl Serialize for Rope { + /// Ropes are always serialized into contiguous strings, because + /// deserialization won't deduplicate and share the Arcs (being the only + /// possible owner of a individual "shared" data doesn't make sense). + fn serialize(&self, serializer: S) -> Result { + use serde::ser::Error; + let s = self.to_str().map_err(Error::custom)?; + serializer.serialize_str(&s) + } +} + +impl<'de> Deserialize<'de> for Rope { + /// Deserializes strings into a contiguous, immutable Rope. + fn deserialize>(deserializer: D) -> Result { + let bytes = >::deserialize(deserializer)?; + Ok(Rope::from(bytes)) + } +} + +impl DeterministicHash for InnerRope { + /// Ropes with similar contents hash the same, regardless of their + /// structure. Notice the InnerRope does not contain a length (and any + /// shared InnerRopes won't either), so the exact structure isn't + /// relevant at this point. + fn deterministic_hash(&self, state: &mut H) { + for v in self.0.iter() { + v.deterministic_hash(state); + } + } +} + +impl From for InnerRope { + fn from(bytes: Bytes) -> Self { + InnerRope::from(vec![Local(bytes)]) + } +} + +impl From> for InnerRope { + fn from(els: Vec) -> Self { + InnerRope(Arc::new(els)) + } +} + +impl PartialEq for InnerRope { + /// Ropes with similar contents are equals, regardless of their structure. + fn eq(&self, other: &Self) -> bool { + let mut left = RopeReader::new(self); + let mut right = RopeReader::new(other); + + loop { + match (left.fill_buf(), right.fill_buf()) { + // fill_buf should always return Ok, with either some number of bytes or 0 bytes + // when consumed. + (Ok(a), Ok(b)) => { + let len = min(a.len(), b.len()); + + // When one buffer is consumed, both must be consumed. + if len == 0 { + return a.len() == b.len(); + } + + if a[0..len] != b[0..len] { + return false; + } + + left.consume(len); + right.consume(len); + } + + // If an error is ever returned (which shouldn't happen for us) for either/both, + // then we can't prove equality. + _ => return false, + } + } + } +} +impl Eq for InnerRope {} + +impl ops::Deref for InnerRope { + type Target = Arc>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DeterministicHash for RopeElem { + /// Ropes with similar contents hash the same, regardless of their + /// structure. Notice the Bytes length is not hashed, and shared InnerRopes + /// do not contain a length. + fn deterministic_hash(&self, state: &mut H) { + match self { + Local(bytes) => state.write_bytes(bytes), + Shared(inner) => inner.deterministic_hash(state), + } + } +} + +/// Implements the Read/AsyncRead/Stream/Iterator trait over a Rope. +pub struct RopeReader { + /// The Rope's tree is kept as a cloned stack, allowing us to accomplish + /// incremental yielding. + stack: Vec, +} + +/// A StackElem holds the current index into either a Bytes or a shared Rope. +/// When the index reaches the end of the associated data, it is removed and we +/// continue onto the next item in the stack. +enum StackElem { + Local(Bytes), + Shared(InnerRope, usize), +} + +impl RopeReader { + fn new(rope: &InnerRope) -> Self { + RopeReader { + stack: vec![StackElem::from(rope)], + } + } + + /// A shared implementation for reading bytes. This takes the basic + /// operations needed for both Read and AsyncRead. + fn read_internal(&mut self, want: usize, buf: &mut ReadBuf<'_>) -> usize { + let mut remaining = want; + + while remaining > 0 { + let mut bytes = match self.next() { + None => break, + Some(b) => b, + }; + + let amount = min(bytes.len(), remaining); + + buf.put_slice(&bytes[0..amount]); + + if amount < bytes.len() { + bytes.advance(amount); + self.stack.push(StackElem::Local(bytes)) + } + remaining -= amount; + } + + want - remaining + } +} + +impl Iterator for RopeReader { + type Item = Bytes; + + /// Iterates the rope's elements recursively until we find the next Local + /// section, returning its Bytes. + fn next(&mut self) -> Option { + loop { + let (inner, mut index) = match self.stack.pop() { + None => return None, + Some(StackElem::Local(b)) => return Some(b), + Some(StackElem::Shared(r, i)) => (r, i), + }; + + let el = inner[index].clone(); + index += 1; + if index < inner.len() { + self.stack.push(StackElem::Shared(inner, index)); + } + + self.stack.push(StackElem::from(el)); + } + } +} + +impl Read for RopeReader { + /// Reads the Rope into the provided buffer. + fn read(&mut self, buf: &mut [u8]) -> io::Result { + Ok(self.read_internal(buf.len(), &mut ReadBuf::new(buf))) + } +} + +impl AsyncRead for RopeReader { + /// Reads the Rope into the provided buffer, asynchronously. + fn poll_read( + self: Pin<&mut Self>, + _cx: &mut TaskContext<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let this = self.get_mut(); + this.read_internal(buf.remaining(), buf); + Poll::Ready(Ok(())) + } +} + +impl BufRead for RopeReader { + /// Returns the full buffer without coping any data. The same bytes will + /// continue to be returned until [consume] is called to either consume a + /// partial amount of bytes (in which case the Bytes will advance beyond + /// them) or the full amount of bytes (in which case the next Bytes will be + /// returned). + fn fill_buf(&mut self) -> IoResult<&[u8]> { + let bytes = match self.next() { + None => return Ok(EMPTY_BUF), + Some(b) => b, + }; + + self.stack.push(StackElem::Local(bytes)); + let bytes = match self.stack.last() { + Some(StackElem::Local(b)) => b, + _ => unreachable!(), + }; + Ok(bytes) + } + + /// Consumes some amount of bytes from the current Bytes instance, ensuring + /// those bytes are not returned on the next call to [fill_buf]. + fn consume(&mut self, amt: usize) { + if let Some(StackElem::Local(b)) = self.stack.last_mut() { + if amt == b.len() { + self.stack.pop(); + } else { + b.advance(amt); + } + } + } +} + +impl Stream for RopeReader { + /// The Result item type is required for this to be streamable into a + /// [Hyper::Body]. + type Item = Result; + + /// Returns a "result" of reading the next shared bytes reference. This + /// differs from [Read::read] by not copying any memory. + fn poll_next(self: Pin<&mut Self>, _cx: &mut TaskContext<'_>) -> Poll> { + let this = self.get_mut(); + + let bytes = match this.next() { + None => return Poll::Ready(None), + Some(b) => b, + }; + + Poll::Ready(Some(Ok(bytes))) + } +} + +impl From<&InnerRope> for StackElem { + fn from(rope: &InnerRope) -> Self { + Self::Shared(rope.clone(), 0) + } +} + +impl From for StackElem { + fn from(el: RopeElem) -> Self { + match el { + Local(bytes) => Self::Local(bytes), + Shared(inner) => Self::Shared(inner, 0), + } + } +} diff --git a/crates/turbo-tasks-hash/src/deterministic_hash.rs b/crates/turbo-tasks-hash/src/deterministic_hash.rs index ce7b364cfbf7e..4acc4926137be 100644 --- a/crates/turbo-tasks-hash/src/deterministic_hash.rs +++ b/crates/turbo-tasks-hash/src/deterministic_hash.rs @@ -102,17 +102,17 @@ impl DeterministicHash for &T { } } -impl DeterministicHash for String { +impl DeterministicHash for [u8] { fn deterministic_hash(&self, state: &mut H) { state.write_usize(self.len()); - state.write_bytes(self.as_bytes()); + state.write_bytes(self); } } -impl DeterministicHash for [u8] { +impl DeterministicHash for String { fn deterministic_hash(&self, state: &mut H) { state.write_usize(self.len()); - state.write_bytes(self); + state.write_bytes(self.as_bytes()); } } @@ -128,6 +128,15 @@ impl DeterministicHash for Option { } } +impl DeterministicHash for Vec { + fn deterministic_hash(&self, state: &mut H) { + state.write_usize(self.len()); + for v in self { + v.deterministic_hash(state); + } + } +} + /// HasherWrapper allows the DeterministicHasher to be used as a Hasher, for /// standard types that do not allow us to directly access their internals. struct HasherWrapper<'a, D: DeterministicHasher>(&'a mut D); diff --git a/crates/turbopack-cli-utils/src/issue.rs b/crates/turbopack-cli-utils/src/issue.rs index 08dd652b55412..e35f09ba1c703 100644 --- a/crates/turbopack-cli-utils/src/issue.rs +++ b/crates/turbopack-cli-utils/src/issue.rs @@ -444,7 +444,7 @@ impl ConsoleUiVc { let issues = issues .iter_with_shortest_path() - .map(async move |(issue, path)| { + .map(|(issue, path)| async move { // (issue.) let plain_issue = issue.into_plain(); let id = plain_issue.internal_hash().await?; diff --git a/crates/turbopack-core/Cargo.toml b/crates/turbopack-core/Cargo.toml index 53e5a8c72ed95..f1aeb0257ad3c 100644 --- a/crates/turbopack-core/Cargo.toml +++ b/crates/turbopack-core/Cargo.toml @@ -11,7 +11,6 @@ bench = false [dependencies] anyhow = "1.0.47" -async-recursion = "1.0.0" async-trait = "0.1.56" browserslist-rs = "=0.11.0" # Keep consistent with preset_env_base through swc_core futures = "0.3.24" diff --git a/crates/turbopack-core/src/code_builder.rs b/crates/turbopack-core/src/code_builder.rs index d59db9bec3c1c..5ec1ae9cb628c 100644 --- a/crates/turbopack-core/src/code_builder.rs +++ b/crates/turbopack-core/src/code_builder.rs @@ -1,11 +1,12 @@ use std::{ - fmt::{Result as FmtResult, Write}, + cmp::min, + io::{BufRead, Result as IoResult, Write}, ops, }; use anyhow::Result; use sourcemap::SourceMapBuilder; -use turbo_tasks::primitives::StringVc; +use turbo_tasks_fs::rope::{Rope, RopeBuilder}; use crate::{ source_map::{GenerateSourceMap, GenerateSourceMapVc, SourceMapSection, SourceMapVc}, @@ -14,58 +15,53 @@ use crate::{ /// Code stores combined output code and the source map of that output code. #[turbo_tasks::value(shared)] -#[derive(Debug, Clone, Default)] +#[derive(Debug, Clone)] pub struct Code { - code: String, + code: Rope, /// A mapping of byte-offset in the code string to an associated source map. mappings: Vec<(usize, Option)>, } -impl Code { - pub fn new() -> Self { - Default::default() - } +/// CodeBuilder provides a mutable container to append source code. +#[derive(Default)] +pub struct CodeBuilder { + code: RopeBuilder, - pub fn source_code(&self) -> &str { + /// A mapping of byte-offset in the code string to an associated source map. + mappings: Vec<(usize, Option)>, +} + +impl Code { + pub fn source_code(&self) -> &Rope { &self.code } - /// Setting breakpoints on synthetic code can cause weird behaviors - /// because Chrome will treat the location as belonging to the previous - /// original code section. By inserting an empty source map when reaching a - /// synthetic section directly after an original section, we tell Chrome - /// that the previous map ended at this point. - fn push_map(&mut self, map: Option) { - if map.is_none() && matches!(self.mappings.last(), None | Some((_, None))) { - // No reason to push an empty map directly after an empty map - return; - } - - debug_assert!( - map.is_some() || !self.mappings.is_empty(), - "the first mapping is never a None" - ); - self.mappings.push((self.code.len(), map)); + /// Tests if any code in this Code contains an associated source map. + pub fn has_source_map(&self) -> bool { + !self.mappings.is_empty() } +} +impl CodeBuilder { /// Pushes synthetic runtime code without an associated source map. This is /// the default concatenation operation, but it's designed to be used /// with the `+=` operator. - fn push_str(&mut self, code: &str) { - self.push_source(code, None); + fn push_static_bytes(&mut self, code: &'static [u8]) { + self.push_map(None); + self.code.push_static_bytes(code); } /// Pushes original user code with an optional source map if one is /// available. If it's not, this is no different than pushing Synthetic /// code. - pub fn push_source(&mut self, code: &str, map: Option) { + pub fn push_source(&mut self, code: &Rope, map: Option) { self.push_map(map); self.code += code; } - /// Copies the Synthetic/Original code of an already constructed CodeBuilder - /// into this instance. + /// Copies the Synthetic/Original code of an already constructed Code into + /// this instance. pub fn push_code(&mut self, prebuilt: &Code) { if let Some((index, _)) = prebuilt.mappings.first() { if *index > 0 { @@ -89,30 +85,51 @@ impl Code { self.code += &prebuilt.code; } + /// Setting breakpoints on synthetic code can cause weird behaviors + /// because Chrome will treat the location as belonging to the previous + /// original code section. By inserting an empty source map when reaching a + /// synthetic section directly after an original section, we tell Chrome + /// that the previous map ended at this point. + fn push_map(&mut self, map: Option) { + if map.is_none() && matches!(self.mappings.last(), None | Some((_, None))) { + // No reason to push an empty map directly after an empty map + return; + } + + debug_assert!( + map.is_some() || !self.mappings.is_empty(), + "the first mapping is never a None" + ); + self.mappings.push((self.code.len(), map)); + } + /// Tests if any code in this CodeBuilder contains an associated source map. pub fn has_source_map(&self) -> bool { !self.mappings.is_empty() } -} -impl ops::AddAssign<&str> for Code { - fn add_assign(&mut self, rhs: &str) { - self.push_str(rhs); + pub fn build(self) -> Code { + Code { + code: self.code.build(), + mappings: self.mappings, + } } } -impl Write for Code { - fn write_str(&mut self, s: &str) -> FmtResult { - self.push_str(s); - Ok(()) +impl ops::AddAssign<&'static str> for CodeBuilder { + fn add_assign(&mut self, rhs: &'static str) { + self.push_static_bytes(rhs.as_bytes()); } } -#[turbo_tasks::value_impl] -impl CodeVc { - #[turbo_tasks::function] - pub async fn source_code(self) -> Result { - Ok(StringVc::cell(self.await?.source_code().to_string())) +impl Write for CodeBuilder { + fn write(&mut self, bytes: &[u8]) -> IoResult { + self.push_map(None); + self.code.write(bytes) + } + + fn flush(&mut self) -> IoResult<()> { + self.code.flush() } } @@ -131,21 +148,29 @@ impl GenerateSourceMap for Code { let mut pos = SourcePos::new(); let mut last_byte_pos = 0; - let sections = self - .mappings - .iter() - .map(|(byte_pos, map)| { - pos.update(&self.code[last_byte_pos..*byte_pos]); - last_byte_pos = *byte_pos; - - let encoded = match map { - None => empty_map(), - Some(map) => map.generate_source_map(), - }; - - SourceMapSection::new(pos, encoded) - }) - .collect(); + let mut sections = Vec::with_capacity(self.mappings.len()); + let mut read = self.code.read(); + for (byte_pos, map) in &self.mappings { + let mut want = byte_pos - last_byte_pos; + while want > 0 { + let buf = read.fill_buf()?; + debug_assert!(!buf.is_empty()); + + let end = min(want, buf.len()); + pos.update(&buf[0..end]); + + read.consume(end); + want -= end; + } + last_byte_pos = *byte_pos; + + let encoded = match map { + None => empty_map(), + Some(map) => map.generate_source_map(), + }; + + sections.push(SourceMapSection::new(pos, encoded)) + } Ok(SourceMapVc::new_sectioned(sections)) } diff --git a/crates/turbopack-core/src/introspect/asset.rs b/crates/turbopack-core/src/introspect/asset.rs index ab7f1aa8c3b92..44bc7fc0b0a3d 100644 --- a/crates/turbopack-core/src/introspect/asset.rs +++ b/crates/turbopack-core/src/introspect/asset.rs @@ -62,8 +62,8 @@ pub async fn content_to_details(content: AssetContentVc) -> Result { AssetContent::File(file_content) => match &*file_content.await? { FileContent::Content(file) => { let content = file.content(); - match std::str::from_utf8(content) { - Ok(str) => StringVc::cell(str.to_string()), + match content.to_str() { + Ok(str) => StringVc::cell(str.into_owned()), Err(_) => StringVc::cell(format!("{} binary bytes", content.len())), } } diff --git a/crates/turbopack-core/src/source_map.rs b/crates/turbopack-core/src/source_map.rs index b00069b25c9c1..ac618d9a95134 100644 --- a/crates/turbopack-core/src/source_map.rs +++ b/crates/turbopack-core/src/source_map.rs @@ -1,10 +1,10 @@ use std::{io::Write, ops::Deref, sync::Arc}; use anyhow::Result; -use async_recursion::async_recursion; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use sourcemap::SourceMap as CrateMap; -use turbo_tasks::{primitives::BytesVc, TryJoinIterExt}; +use turbo_tasks::TryJoinIterExt; +use turbo_tasks_fs::rope::{Rope, RopeBuilder, RopeVc}; use crate::source_pos::SourcePos; @@ -82,80 +82,84 @@ impl<'a> From> for Token { } } -impl SourceMap { - /// Encoding a SourceMap stringifies it into JSON. - #[async_recursion] - async fn encode(&self, w: &mut W) -> Result<()> { - match self { - SourceMap::Regular(r) => r.0.to_writer(w)?, +impl SourceMapVc { + /// Creates a new SourceMap::Regular Vc out of a sourcemap::SourceMap + /// ("CrateMap") instance. + pub fn new_regular(map: CrateMap) -> Self { + SourceMap::Regular(RegularSourceMap::new(map)).cell() + } + + /// Creates a new SourceMap::Sectioned Vc out of a collection of source map + /// sections. + pub fn new_sectioned(sections: Vec) -> Self { + SourceMap::Sectioned(SectionedSourceMap::new(sections)).cell() + } +} + +#[turbo_tasks::value_impl] +impl SourceMapVc { + /// Stringifies the source map into JSON bytes. + #[turbo_tasks::function] + pub async fn to_rope(self) -> Result { + let this = self.await?; + let rope = match &*this { + SourceMap::Regular(r) => { + let mut bytes = vec![]; + r.0.to_writer(&mut bytes)?; + Rope::from(bytes) + } SourceMap::Sectioned(s) => { + if s.sections.len() == 1 { + let s = &s.sections[0]; + if s.offset == (0, 0) { + return Ok(s.map.to_rope()); + } + } + // My kingdom for a decent dedent macro with interpolation! - write!( - w, - r#"{{ + let mut rope = RopeBuilder::from( + r#"{ "version": 3, - "sections": ["# - )?; + "sections": ["#, + ); let sections = s .sections .iter() - .map(async move |s| Ok((s.offset, s.map.await?))) + .map(|s| async move { Ok((s.offset, s.map.to_rope().await?)) }) .try_join() .await?; let mut first_section = true; - for (offset, map) in sections { + for (offset, section_map) in sections { if !first_section { - write!(w, ",")?; + write!(rope, ",")?; } first_section = false; write!( - w, + rope, r#" {{"offset": {{"line": {}, "column": {}}}, "map": "#, offset.line, offset.column, )?; - map.encode(w).await?; - write!(w, r#"}}"#)?; + rope += &*section_map; + + write!(rope, r#"}}"#)?; } write!( - w, + rope, r#"] }}"# )?; - } - } - Ok(()) - } -} -impl SourceMapVc { - /// Creates a new SourceMap::Regular Vc out of a sourcemap::SourceMap - /// ("CrateMap") instance. - pub fn new_regular(map: CrateMap) -> Self { - SourceMap::Regular(RegularSourceMap::new(map)).cell() - } - - /// Creates a new SourceMap::Sectioned Vc out of a collection of source map - /// sections. - pub fn new_sectioned(sections: Vec) -> Self { - SourceMap::Sectioned(SectionedSourceMap::new(sections)).cell() - } -} - -#[turbo_tasks::value_impl] -impl SourceMapVc { - /// Stringifies the source map into JSON bytes. - #[turbo_tasks::function] - pub async fn to_bytes(self) -> Result { - let mut bytes = vec![]; - self.await?.encode(&mut bytes).await?; - Ok(BytesVc::cell(bytes)) + rope.build() + } + }; + Ok(rope.cell()) } /// Traces a generated line/column into an mapping token representing either @@ -267,7 +271,7 @@ impl Serialize for CrateMapWrapper { } impl<'de> Deserialize<'de> for CrateMapWrapper { - fn deserialize>(deserializer: D) -> Result { + fn deserialize>(deserializer: D) -> Result { use serde::de::Error; let bytes = <&[u8]>::deserialize(deserializer)?; let map = CrateMap::from_slice(bytes).map_err(Error::custom)?; diff --git a/crates/turbopack-core/src/source_pos.rs b/crates/turbopack-core/src/source_pos.rs index ea512fd5b4206..0b55d8e7e871e 100644 --- a/crates/turbopack-core/src/source_pos.rs +++ b/crates/turbopack-core/src/source_pos.rs @@ -3,15 +3,9 @@ use turbo_tasks::trace::TraceRawVcs; use turbo_tasks_hash::DeterministicHash; /// LINE FEED (LF), one of the basic JS line terminators. -const U16_LF: u16 = 0x0A; +const U8_LF: u8 = 0x0A; /// CARRIAGE RETURN (CR), one of the basic JS line terminators. -const U16_CR: u16 = 0x0D; -/// LINE SEPARATOR, one of the ES2019 line terminators to make JS a superset of -/// JSON. -const U16_LS: u16 = 0x2028; -/// PARAGRAPH SEPARATOR, one of the ES2019 line terminators to make JS a -/// superset of JSON. -const U16_PS: u16 = 0x2029; +const U8_CR: u8 = 0x0D; #[derive( Default, @@ -49,7 +43,7 @@ impl SourcePos { /// a single terminator), and JSON LINE/PARAGRAPH SEPARATORs. /// /// See https://tc39.es/ecma262/multipage/ecmascript-language-lexical-grammar.html#sec-line-terminators - pub fn update(&mut self, code: &str) { + pub fn update(&mut self, code: &[u8]) { // JS source text is interpreted as UCS-2, which is basically UTF-16 with less // restrictions. We cannot iterate UTF-8 bytes here, 2-byte UTF-8 octets // should count as a 1 char and not 2. @@ -57,23 +51,83 @@ impl SourcePos { mut line, mut column, } = self; - let mut u16_chars = code.encode_utf16().peekable(); - while let Some(c) = u16_chars.next() { - match c { - U16_LF | U16_LS | U16_PS => { + + let mut i = 0; + while i < code.len() { + // This is not a UTF-8 validator, but it's likely close enough. It's assumed + // that the input is valid (and if it isn't than what are you doing trying to + // embed it into source code anyways?). The important part is that we process in + // order, and use the first octet's bit pattern to decode the octet length of + // the char. + match code[i] { + U8_LF => { + i += 1; line += 1; column = 0; } - U16_CR => { + U8_CR => { // Count "\r\n" as a single terminator. - u16_chars.next_if(|&c| c == U16_LF); + if code.get(i + 1) == Some(&U8_LF) { + i += 2; + } else { + i += 1; + } line += 1; column = 0; } - _ => column += 1, + + // 1 octet chars do not have the high bit set. If it's not a LF or CR, then it's + // just a regular ASCII. + b if b & 0b10000000 == 0 => { + i += 1; + column += 1; + } + + // 2 octet chars have a leading `110` bit pattern. None are considered line + // terminators. + b if b & 0b11100000 == 0b11000000 => { + // eat this byte and the next. + i += 2; + column += 1; + } + + // 3 octet chars have a leading `1110` bit pattern. Both the LINE/PARAGRAPH + // SEPARATOR exist in 3 octets. + b if b & 0b11110000 == 0b11100000 => { + // The LINE and PARAGRAPH have the bits `11100010 10000000 1010100X`, with the X + // denoting either line or paragraph. + let mut separator = false; + if b == 0b11100010 && code.get(i + 1) == Some(&0b10000000) { + let last = code.get(i + 2).cloned().unwrap_or_default(); + separator = (last & 0b11111110) == 0b10101000 + } + + // eat this byte and the next 2. + i += 3; + if separator { + line += 1; + column = 0; + } else { + column += 1; + } + } + + // 4 octet chars have a leading `11110` pattern, but we don't need to check because + // none of the other patterns matched. + _ => { + // eat this byte and the next 3. + i += 4; + column += 1; + } } } self.line = line; self.column = column; } } + +impl std::cmp::PartialEq<(usize, usize)> for SourcePos { + fn eq(&self, other: &(usize, usize)) -> bool { + &(self.line, self.column) == other + } +} diff --git a/crates/turbopack-css/src/module_asset.rs b/crates/turbopack-css/src/module_asset.rs index 74e554784a109..8ec6ea9e83dd9 100644 --- a/crates/turbopack-css/src/module_asset.rs +++ b/crates/turbopack-css/src/module_asset.rs @@ -193,7 +193,7 @@ impl EcmascriptChunkItem for ModuleChunkItem { } code += "});\n"; EcmascriptChunkItemContent { - inner_code: code, + inner_code: code.into(), // TODO: We generate a minimal map for runtime code so that the filename is // displayed in dev tools. ..Default::default() @@ -201,7 +201,7 @@ impl EcmascriptChunkItem for ModuleChunkItem { } ParseResult::NotFound | ParseResult::Unparseable => { EcmascriptChunkItemContent { - inner_code: "__turbopack_export_value__({});\n".to_string(), + inner_code: "__turbopack_export_value__({});\n".into(), // TODO: We generate a minimal map for runtime code so that the filename is // displayed in dev tools. ..Default::default() diff --git a/crates/turbopack-css/src/parse.rs b/crates/turbopack-css/src/parse.rs index 98d9b68e6923b..e877d40768459 100644 --- a/crates/turbopack-css/src/parse.rs +++ b/crates/turbopack-css/src/parse.rs @@ -60,11 +60,19 @@ pub async fn parse( AssetContent::Redirect { .. } => ParseResult::Unparseable.cell(), AssetContent::File(file) => match &*file.await? { FileContent::NotFound => ParseResult::NotFound.cell(), - FileContent::Content(file) => match String::from_utf8(file.content().to_vec()) { + FileContent::Content(file) => match file.content().to_str() { Err(_err) => ParseResult::Unparseable.cell(), Ok(string) => { let transforms = &*transforms.await?; - parse_content(string, fs_path, fs_path_str, source, ty, transforms).await? + parse_content( + string.into_owned(), + fs_path, + fs_path_str, + source, + ty, + transforms, + ) + .await? } }, }, diff --git a/crates/turbopack-dev-server/src/lib.rs b/crates/turbopack-dev-server/src/lib.rs index e3513c88ae45a..b1fc5e9ceb320 100644 --- a/crates/turbopack-dev-server/src/lib.rs +++ b/crates/turbopack-dev-server/src/lib.rs @@ -130,12 +130,13 @@ async fn process_request_with_content_source( |m| m.to_string(), ); - let bytes = content.content().to_vec(); + let content = content.content(); + let bytes = content.read(); return Ok(Response::builder() .status(200) .header("Content-Type", content_type) - .header("Content-Length", bytes.len().to_string()) - .body(hyper::Body::from(bytes))?); + .header("Content-Length", content.len().to_string()) + .body(hyper::Body::wrap_stream(bytes))?); } } } @@ -152,7 +153,7 @@ async fn process_request_with_content_source( ); } - return Ok(response.body(hyper::Body::from(proxy_result.body.clone()))?); + return Ok(response.body(hyper::Body::wrap_stream(proxy_result.body.read()))?); } ContentSourceResult::NeedData { source, path, vary } => { resolved_source = source.resolve_strongly_consistent().await?; diff --git a/crates/turbopack-dev-server/src/source/mod.rs b/crates/turbopack-dev-server/src/source/mod.rs index 811f1204b18ba..f0e48da4a701d 100644 --- a/crates/turbopack-dev-server/src/source/mod.rs +++ b/crates/turbopack-dev-server/src/source/mod.rs @@ -15,6 +15,7 @@ use std::{ use anyhow::Result; use serde::{Deserialize, Serialize, Serializer}; use turbo_tasks::{trace::TraceRawVcs, Value}; +use turbo_tasks_fs::rope::Rope; use turbopack_core::version::VersionedContentVc; use self::query::Query; @@ -27,7 +28,7 @@ pub struct ProxyResult { /// Headers arranged as contiguous (name, value) pairs. pub headers: Vec, /// The body to return. - pub body: Vec, + pub body: Rope, } #[turbo_tasks::value(shared)] diff --git a/crates/turbopack-ecmascript/src/chunk/loader.rs b/crates/turbopack-ecmascript/src/chunk/loader.rs index 1bda4ce6a820c..e9359116fa4e5 100644 --- a/crates/turbopack-ecmascript/src/chunk/loader.rs +++ b/crates/turbopack-ecmascript/src/chunk/loader.rs @@ -1,4 +1,4 @@ -use std::fmt::Write as FmtWrite; +use std::io::Write as _; use anyhow::{anyhow, bail, Result}; use indexmap::IndexSet; @@ -77,7 +77,7 @@ impl EcmascriptChunkItem for ManifestLoaderItem { #[turbo_tasks::function] async fn content(&self) -> Result { - let mut code = String::new(); + let mut code = Vec::new(); let manifest = self.manifest.await?; let asset = manifest.asset.as_asset(); @@ -126,7 +126,7 @@ __turbopack_export_value__((__turbopack_import__) => {{ )?; Ok(EcmascriptChunkItemContent { - inner_code: code, + inner_code: code.into(), ..Default::default() } .into()) @@ -262,12 +262,11 @@ impl EcmascriptChunkItem for ManifestChunkItem { chunk_server_paths.insert(chunk_server_path.to_string()); } - let mut code = String::new(); - code += "const chunks = [\n"; + let mut code = b"const chunks = [\n".to_vec(); for pathname in chunk_server_paths { writeln!(code, " {},", stringify_str(&pathname))?; } - code += "];\n"; + writeln!(code, "];")?; // TODO: a dedent macro would be awesome. write!( @@ -277,7 +276,7 @@ __turbopack_export_value__(Promise.all(chunks.map(__turbopack_load__)));" )?; Ok(EcmascriptChunkItemContent { - inner_code: code, + inner_code: code.into(), ..Default::default() } .into()) diff --git a/crates/turbopack-ecmascript/src/chunk/mod.rs b/crates/turbopack-ecmascript/src/chunk/mod.rs index 257623019d599..23ae348b77ca3 100644 --- a/crates/turbopack-ecmascript/src/chunk/mod.rs +++ b/crates/turbopack-ecmascript/src/chunk/mod.rs @@ -2,9 +2,9 @@ pub mod loader; pub(crate) mod optimize; pub mod source_map; -use std::{fmt::Write as _, slice::Iter}; +use std::{fmt::Write, io::Write as _, slice::Iter}; -use anyhow::{anyhow, bail, Context, Result}; +use anyhow::{anyhow, bail, Result}; use indexmap::{IndexMap, IndexSet}; use serde::{Deserialize, Serialize}; use turbo_tasks::{ @@ -12,7 +12,9 @@ use turbo_tasks::{ trace::TraceRawVcs, TryJoinIterExt, ValueToString, ValueToStringVc, }; -use turbo_tasks_fs::{embed_file, File, FileContent, FileSystemPathOptionVc, FileSystemPathVc}; +use turbo_tasks_fs::{ + embed_file, rope::Rope, File, FileContent, FileSystemPathOptionVc, FileSystemPathVc, +}; use turbo_tasks_hash::{encode_hex, hash_xxh3_hash64, Xxh3Hash64Hasher}; use turbopack_core::{ asset::{Asset, AssetContentVc, AssetVc}, @@ -23,7 +25,7 @@ use turbopack_core::{ ChunkReferenceVc, ChunkVc, ChunkableAsset, ChunkableAssetVc, ChunkingContextVc, FromChunkableAsset, ModuleId, ModuleIdReadRef, ModuleIdVc, ModuleIdsVc, }, - code_builder::{Code, CodeReadRef, CodeVc}, + code_builder::{Code, CodeBuilder, CodeReadRef, CodeVc}, introspect::{ asset::{children_from_asset_references, content_to_details, IntrospectableAssetVc}, Introspectable, IntrospectableChildrenVc, IntrospectableVc, @@ -473,7 +475,7 @@ impl EcmascriptChunkContentEntry { &self.code } - fn source_code(&self) -> &str { + fn source_code(&self) -> &Rope { self.code.source_code() } } @@ -486,7 +488,7 @@ impl EcmascriptChunkContentEntryVc { let factory = module_factory(content); let id = chunk_item.id().await?; let code = factory.await?; - let hash = hash_xxh3_hash64(code.source_code().as_bytes()); + let hash = hash_xxh3_hash64(code.source_code()); Ok(EcmascriptChunkContentEntry { chunk_item, id, @@ -519,13 +521,14 @@ async fn module_factory(content: EcmascriptChunkItemContentVc) -> Result if content.options.exports { args.push("e: exports"); } - let mut code = Code::new(); + let mut code = CodeBuilder::default(); let args = FormatIter(|| args.iter().copied().intersperse(", ")); if content.options.this { write!(code, "(function({{ {} }}) {{ !function() {{\n\n", args,)?; } else { write!(code, "(({{ {} }}) => (() => {{\n\n", args,)?; } + let source_map = content.source_map.map(|sm| sm.as_generate_source_map()); code.push_source(&content.inner_code, source_map); if content.options.this { @@ -533,7 +536,7 @@ async fn module_factory(content: EcmascriptChunkItemContentVc) -> Result } else { code += "\n})())"; } - Ok(code.cell()) + Ok(code.build().cell()) } #[derive(Serialize)] @@ -563,7 +566,6 @@ impl EcmascriptChunkContentVc { #[turbo_tasks::function] async fn code(self) -> Result { let this = self.await?; - let mut code = Code::new(); let chunk_path = &*this.chunk_path.await?; let chunk_server_path = if let Some(path) = this.output_root.await?.get_path_to(chunk_path) { @@ -575,17 +577,17 @@ impl EcmascriptChunkContentVc { this.output_root.to_string().await? ); }; - writeln!( - code, - "(self.TURBOPACK = self.TURBOPACK || []).push([{}, {{", - stringify_str(chunk_server_path) - )?; + let mut code = CodeBuilder::default(); + code += "(self.TURBOPACK = self.TURBOPACK || []).push(["; + + writeln!(code, "{}, {{", stringify_str(chunk_server_path))?; for entry in &this.module_factories { write!(code, "\n{}: ", &stringify_module_id(entry.id()))?; code.push_code(entry.code()); code += ","; } code += "\n}"; + if let Some(evaluate) = &this.evaluate { let evaluate = evaluate.await?; let condition = evaluate @@ -623,13 +625,10 @@ impl EcmascriptChunkContentVc { code += "]);\n"; if this.evaluate.is_some() { let runtime_code = embed_file!("js/src/runtime.js").await?; - let runtime_code = match &*runtime_code { + match &*runtime_code { FileContent::NotFound => return Err(anyhow!("runtime code is not found")), - FileContent::Content(file) => String::from_utf8(file.content().to_vec()) - .context("runtime code is invalid UTF-8")?, + FileContent::Content(file) => code.push_source(file.content(), None), }; - // Add the turbopack runtime to the chunk. - code += runtime_code.as_str(); } if code.has_source_map() { @@ -637,13 +636,13 @@ impl EcmascriptChunkContentVc { write!(code, "\n\n//# sourceMappingURL={}.map", filename)?; } - Ok(code.cell()) + Ok(code.build().cell()) } #[turbo_tasks::function] async fn content(self) -> Result { - let code = self.code().source_code().await?; - Ok(File::from(code).into()) + let code = self.code().await?; + Ok(File::from(code.source_code().clone()).into()) } } @@ -743,7 +742,7 @@ impl GenerateSourceMap for EcmascriptChunkContent { #[derive(serde::Serialize)] struct HmrUpdateEntry<'a> { - code: &'a str, + code: &'a Rope, map: Option, } @@ -1113,7 +1112,7 @@ impl EcmascriptChunkPlaceablesVc { #[turbo_tasks::value(shared)] #[derive(Default)] pub struct EcmascriptChunkItemContent { - pub inner_code: String, + pub inner_code: Rope, pub source_map: Option, pub options: EcmascriptChunkItemOptions, pub placeholder_for_future_extensions: (), diff --git a/crates/turbopack-ecmascript/src/chunk/source_map.rs b/crates/turbopack-ecmascript/src/chunk/source_map.rs index e1e35ec87f227..835a9e4facb1a 100644 --- a/crates/turbopack-ecmascript/src/chunk/source_map.rs +++ b/crates/turbopack-ecmascript/src/chunk/source_map.rs @@ -39,9 +39,9 @@ impl Asset for EcmascriptChunkSourceMapAsset { .chunk .chunk_content() .generate_source_map() - .to_bytes() + .to_rope() .await?; - Ok(File::from(sm.as_slice()).into()) + Ok(File::from(sm).into()) } #[turbo_tasks::function] @@ -87,9 +87,9 @@ impl Asset for EcmascriptChunkEntrySourceMapAsset { .await? .code_vc .generate_source_map() - .to_bytes() + .to_rope() .await?; - Ok(File::from(sm.as_slice()).into()) + Ok(File::from(sm).into()) } #[turbo_tasks::function] diff --git a/crates/turbopack-ecmascript/src/chunk_group_files_asset.rs b/crates/turbopack-ecmascript/src/chunk_group_files_asset.rs index 89dfb82e4ca85..75aea279f13fc 100644 --- a/crates/turbopack-ecmascript/src/chunk_group_files_asset.rs +++ b/crates/turbopack-ecmascript/src/chunk_group_files_asset.rs @@ -130,7 +130,7 @@ impl EcmascriptChunkItem for ChunkGroupFilesChunkItem { } } Ok(EcmascriptChunkItemContent { - inner_code: format!("__turbopack_export_value__({:#});\n", Value::Array(data)), + inner_code: format!("__turbopack_export_value__({:#});\n", Value::Array(data)).into(), ..Default::default() } .cell()) diff --git a/crates/turbopack-ecmascript/src/lib.rs b/crates/turbopack-ecmascript/src/lib.rs index fe9fc2286b22a..70a8976804d6d 100644 --- a/crates/turbopack-ecmascript/src/lib.rs +++ b/crates/turbopack-ecmascript/src/lib.rs @@ -290,7 +290,7 @@ impl EcmascriptChunkItem for ModuleChunkItem { let srcmap = ParseResultSourceMap::new(source_map.clone(), srcmap).cell(); Ok(EcmascriptChunkItemContent { - inner_code: String::from_utf8(bytes)?, + inner_code: bytes.into(), source_map: Some(srcmap), options: if eval_context.is_esm() { EcmascriptChunkItemOptions { @@ -314,7 +314,8 @@ impl EcmascriptChunkItem for ModuleChunkItem { "const e = new Error(\"Could not parse module '{path}'\");\ne.code = \ 'MODULE_UNPARSEABLE';\nthrow e;", path = self.module.path().to_string().await? - ), + ) + .into(), ..Default::default() } .into()) diff --git a/crates/turbopack-ecmascript/src/parse.rs b/crates/turbopack-ecmascript/src/parse.rs index 4c7c67eac7ff3..1458211f99e79 100644 --- a/crates/turbopack-ecmascript/src/parse.rs +++ b/crates/turbopack-ecmascript/src/parse.rs @@ -139,13 +139,21 @@ pub async fn parse( Ok(match &*content.await? { AssetContent::File(file) => match &*file.await? { FileContent::NotFound => ParseResult::NotFound.cell(), - FileContent::Content(file) => match String::from_utf8(file.content().to_vec()) { + FileContent::Content(file) => match file.content().to_str() { Ok(string) => { let transforms = &*transforms.await?; - parse_content(string, fs_path, file_path_hash, source, ty, transforms).await? + parse_content( + string.into_owned(), + fs_path, + file_path_hash, + source, + ty, + transforms, + ) + .await? } // FIXME: report error - Err(_err) => ParseResult::Unparseable.cell(), + Err(_) => ParseResult::Unparseable.cell(), }, }, AssetContent::Redirect { .. } => ParseResult::Unparseable.cell(), diff --git a/crates/turbopack-ecmascript/src/resolve/node_native_binding.rs b/crates/turbopack-ecmascript/src/resolve/node_native_binding.rs index c0420f7cbdde0..91692d07f7b6a 100644 --- a/crates/turbopack-ecmascript/src/resolve/node_native_binding.rs +++ b/crates/turbopack-ecmascript/src/resolve/node_native_binding.rs @@ -100,7 +100,7 @@ pub async fn resolve_node_pre_gyp_files( let config_file_path = config_path.path(); let config_file_dir = config_file_path.parent(); let node_pre_gyp_config: NodePreGypConfigJson = - serde_json::from_slice(config_file.content())?; + serde_json::from_reader(config_file.read())?; let mut assets: IndexSet = IndexSet::new(); for version in node_pre_gyp_config.binary.napi_versions.iter() { let native_binding_path = NAPI_VERSION_TEMPLATE.replace( @@ -236,7 +236,7 @@ pub async fn resolve_node_gyp_build_files( if let AssetContent::File(file) = &*binding_gyp.content().await? { if let FileContent::Content(config_file) = &*file.await? { if let Some(captured) = - GYP_BUILD_TARGET_NAME.captures(std::str::from_utf8(config_file.content())?) + GYP_BUILD_TARGET_NAME.captures(&config_file.content().to_str()?) { let mut resolved: IndexSet = IndexSet::with_capacity(captured.len()); for found in captured.iter().skip(1).flatten() { diff --git a/crates/turbopack-env/src/asset.rs b/crates/turbopack-env/src/asset.rs index bc72e48639b90..cecf6f926c1b4 100644 --- a/crates/turbopack-env/src/asset.rs +++ b/crates/turbopack-env/src/asset.rs @@ -131,7 +131,7 @@ impl EcmascriptChunkItem for ProcessEnvChunkItem { } Ok(EcmascriptChunkItemContent { - inner_code: code, + inner_code: code.into(), ..Default::default() } .cell()) diff --git a/crates/turbopack-json/src/lib.rs b/crates/turbopack-json/src/lib.rs index 6c58638541813..5fd0bebf4904a 100644 --- a/crates/turbopack-json/src/lib.rs +++ b/crates/turbopack-json/src/lib.rs @@ -134,7 +134,7 @@ impl EcmascriptChunkItem for JsonChunkItem { } }; Ok(EcmascriptChunkItemContent { - inner_code, + inner_code: inner_code.into(), ..Default::default() } .into()) diff --git a/crates/turbopack-static/src/lib.rs b/crates/turbopack-static/src/lib.rs index f3e53d8c59ff3..e4307ee38adb7 100644 --- a/crates/turbopack-static/src/lib.rs +++ b/crates/turbopack-static/src/lib.rs @@ -196,7 +196,8 @@ impl EcmascriptChunkItem for ModuleChunkItem { inner_code: format!( "__turbopack_export_value__({path});", path = stringify_str(&format!("/{}", &*self.static_asset.path().await?)) - ), + ) + .into(), ..Default::default() } .into()) diff --git a/crates/turbopack-tests/tests/snapshot.rs b/crates/turbopack-tests/tests/snapshot.rs index b9ab0cb9bd6b9..192381afd9ecd 100644 --- a/crates/turbopack-tests/tests/snapshot.rs +++ b/crates/turbopack-tests/tests/snapshot.rs @@ -256,7 +256,7 @@ async fn get_contents(file: AssetContentVc) -> Result> { Ok(match &*file.await? { AssetContent::File(file) => match &*file.await? { FileContent::NotFound => None, - FileContent::Content(expected) => Some(trimmed_string(expected.content())), + FileContent::Content(expected) => Some(expected.content().to_str()?.trim().to_string()), }, AssetContent::Redirect { target, link_type } => Some(format!( "Redirect {{ target: {target}, link_type: {:?} }}", @@ -301,10 +301,6 @@ async fn diff(path: FileSystemPathVc, actual: AssetContentVc) -> Result<()> { Ok(()) } -fn trimmed_string(input: &[u8]) -> String { - String::from_utf8_lossy(input).trim().to_string() -} - async fn maybe_load_env( project_fs: FileSystemVc, path: &Path, diff --git a/shim/src/ffi.rs b/shim/src/ffi.rs index 8f6d57eb7dd2e..a7dbb982d3b8a 100644 --- a/shim/src/ffi.rs +++ b/shim/src/ffi.rs @@ -1,8 +1,8 @@ /* automatically generated by rust-bindgen 0.61.0 */ -extern "C" { - pub fn nativeRunWithArgs( - argc: ::std::os::raw::c_int, - argv: *mut *mut ::std::os::raw::c_char, - ) -> ::std::os::raw::c_uint; -} +extern "C" { + pub fn nativeRunWithArgs( + argc: ::std::os::raw::c_int, + argv: *mut *mut ::std::os::raw::c_char, + ) -> ::std::os::raw::c_uint; +}