From f598539510bda3a0b0636aa7aa78b657b4d1c5aa Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Thu, 15 Feb 2024 17:10:30 -0700 Subject: [PATCH 01/10] Create initial specification --- spec/file-structure.md | 47 +++++ spec/index.md | 15 ++ spec/types.md | 429 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 491 insertions(+) create mode 100644 spec/file-structure.md create mode 100644 spec/index.md create mode 100644 spec/types.md diff --git a/spec/file-structure.md b/spec/file-structure.md new file mode 100644 index 0000000..f93700f --- /dev/null +++ b/spec/file-structure.md @@ -0,0 +1,47 @@ +# File Structure + +An mbon file is made up of two parts: A header, and the content. + +## Header + +The header begins with this 8-byte signature (hex)`EE 6D 62 6E 0D 0A 1A 00`. +After the signature is a single byte version number, currently (hex)`01`. + +The signature comes from the PNG signature and the explanation will be repeated +here. + +> This signature both identifies the file as a PNG file and provides for +> immediate detection of common file-transfer problems. The first two bytes +> distinguish PNG files on systems that expect the first two bytes to identify +> the file type uniquely. The first byte is chosen as a non-ASCII value to +> reduce the probability that a text file may be misrecognized as a PNG file; +> also, it catches bad file transfers that clear bit 7. Bytes two through four +> name the format. The CR-LF sequence catches bad file transfers that alter +> newline sequences. The control-Z character stops file display under MS-DOS. +> The final line feed checks for the inverse of the CR-LF translation problem. + +The 8-byte signature of mbon in decimal, hex and ascii is + +``` +(dec) 238 109 98 110 13 10 26 0 +(hex) EE 6D 62 6E 0D 0A 1A 00 +(ascii) \356 m b n \r \n \032 \0 +``` + +Like the png signature, this signature aims to have a unique value that can be +used to know that the file is an mbon file without relying on an extension as +well as determine if the file has encountered problems in transfer. + +After the header immediately begins the main content of the file. + +## Content + +The content of an mbon file may be a sequence of items much like a list. After +the primary contents of the file may be a heap of pointer data. This is a single +heap value. + +In order to allow for the main content to grow, it is recommended that the +heap leaves a buffer of reserved data at the beginning of the heap. + +Descriptions of the different types available are available at +[types.md](types.md). diff --git a/spec/index.md b/spec/index.md new file mode 100644 index 0000000..8378965 --- /dev/null +++ b/spec/index.md @@ -0,0 +1,15 @@ +# MBON + +MBON stands for Marked Binary Object Notation. + +It is a file format that aims to be very efficient for reading/writing data to +disk. Portions of the file may be skipped without needing to parse everything in +between. Files can be written without truncating the file on each change. + +This document will discuss how the format is structured as well as how +implementations should behave with the data. + +* [File Structure](file-structure.md) +* [Types](types.md) + + diff --git a/spec/types.md b/spec/types.md new file mode 100644 index 0000000..d8fcfd3 --- /dev/null +++ b/spec/types.md @@ -0,0 +1,429 @@ +# Data types + +In mbon, data is made out of items. These items are made of two parts: A +mark, and a value. Unless otherwise specified, an item is always a mark followed +by data. + +## Size + +Some marks have a size indicator. This indicator is dynamically sized. The +indicator is formatted as follows: + +The indicator starts at one byte in length. There is a continuation bit in each +byte of the indicator. This is the most significant bit in each byte. If it is +1, then there is more to read, otherwise the size indicator is finished. + +```rust +SizeEnd = { '\x00'..'\x7f' } // 0b0000_0000 through 0b0111_1111 +SizeContinue = { '\x80'..'\xff' } // 0b1000_0000 through 0b1111_1111 +Size = { SizeContinue ~ Size | SizeEnd } +``` + +When reading a size indicator, the most significant bit of each byte is ignored. +The value is read as a little-endian unsigned integer. Overall, sizes may not be +larger than 64 bits or 10 characters. + +### Examples + +Given the data (hex)`5a b3 06`, We would first read `5a` which is (bin)`0 +1011010`. We add `0b1011010 << (0 * 7)` to the sum and get `0x5a`. The Most +significant bit is 0, so we are done with a final size of 90. + +Given the data (hex)`b3 06`, We read `b3` (bin)`1 0110011`. We add +`0b0110011 << (0 * 7)` to the sum and get `0x33`. The most significant bit is 1, +so we read the next byte (hex)`06` (bin)`0 0000011`. We add +`0b0000011 << (1 * 7)` to the sum and get `0x1b3`. The most significant bit is +0, so we are done with a final size of 435. + +## U8 + +A u8 data type is represented by the id `b`. There is nothing more to the +mark. + +The data represented is an unsigned 8-bit integer. + +The length of a u8 is always 1. + +## I8 + +An i8 data type is represented by the id `B`. There is nothing more to the +mark. + +The data represented is a signed 8-bit integer. + +The length of an i8 is always 1. + +## U16 + +A u16 data type is represented by the id `h`. There is nothing more to the +mark. + +The data represented is a little-endian unsigned 16-bit integer. + +The length of a u16 is always 2. + +## I16 + +An i16 data type is represented by the id `H`. There is nothing more to the +mark. + +The data represented is a little-endian signed 16-bit integer. + +The length of an i16 is always 2. + +## U32 + +A u32 data type is represented by the id `i`. There is nothing more to the +mark. + +The data represented is a little-endian unsigned 32-bit integer. + +The length of a u32 is always 4. + +## I32 + +An i32 data type is represented by the id `I`. There is nothing more to the +mark. + +The data represented is a little-endian signed 32-bit integer. + +The length of an i32 is always 4. + +## U64 + +A u64 data type is represented by the id `l`. There is nothing more to the +mark. + +The data represented is a little-endian unsigned 64-bit integer. + +The length of a u64 is always 8. + +## I64 + +An i64 data type is represented by the id `L`. There is nothing more to the +mark. + +The data represented is a little-endian signed 64-bit integer. + +The length of an i64 is always 8. + +## F32 + +An f32 data type is represented by the id `f`. There is nothing more to the +mark. + +The data represented is a little-endian IEEE-754 float. + +The length of an f32 is always 4. + +## F64 + +An i64 data type is represented by the id `F`. There is nothing more to the +mark. + +The data represented is a little-endian IEEE-754 double. + +The length of an f64 is always 8. + +## Null + +A null data type is represented by the id `n`. There is nothing more to the +mark. + +There is no data associated with a null. + +The length of a null is always 0. + +## Chars + +Characters are all represented by UTF code points. A majority of english +characters fit within the 8-bit range of a byte. In many other languages, most +will fit within 16-bits. And all characters can fit within 32-bits. + +It is possible to have characters represented by UTF-8, but that would require a +size indicator in the mark to know how long the character is. A better option +would be to have 3 char types each with different sizes to accommodate for all +possible characters without wasting unused space. + +### Small Char + +A char data type is represented by the id `c`. There is nothing more to the +mark. The data represented is a unsigned 8-bit integer which represents a UTF +code point. If the code point doesn't fit within an 8-bit value, then Char or +Big Char should be used instead. + +The length of a small char is always 1. + +### Char + +A char data type is represented by the id `C`. There is nothing more to the +mark. The data represented is a little-endian unsigned 16-bit integer which +represents a UTF code point. If the code point doesn't fit within a 16-bit +value, then Big Char should be used instead. + +The length of a char is always 2. + +### Big Char + +A char data type is represented by the id `G`. There is nothing more to the +mark. The data represented is a little-endian unsigned 32-bit integer which +represents a UTF code point. + +The length of a big char is always 4. + +## String + +A string data type is represented by the id `s`. After the id, is a size +indicator we will call `L`. + +The data represented by a string is a UTF-8 encoded string of `L` bytes. + +The length of a string is `L`. + +## Array + +An array data type is represented by the id `a`. After the id is a recursive +mark we will call `I`. After `I` is a size indicator we will call `N`. + +The data represented by an array is a sequence of `N` data items of type `I`. No +marks are required for each sub-item since it has already been defined by `I`. + +The length of an array is `Len(I) * N`. + +## List + +A list data type is represented by the id `A`. After the id is a size indicator +we will call `L`. + +The data represented by a list is a sequence of items where the total size of +all the items add up to `L` e.g. The contents of the list must be exactly `L` +bytes long. + +The length of an list is `L`. + +## Dict + +A dict data type is represented by the id `d`. After the id is two marks we will +call `K` and `V` respectively. After `V` is a size indicator we will call `N`. + +The data represented by a dict is a sequence of `N` pairs of `K`-`V` data +items. No marks are required for each sub-item since they have already been +defined by `K` and `V`. There are a total of `N * 2` items in a dict and each +pair of items are `K` then `V`. + +The length of a dict is `(Len(K) + Len(V)) * N`. + +## Map + +A map data type is represented by the id `D`. After the id is a size indicator +we will call `L`. + +The data represented by a map is a sequence of pairs of items in a key-value +structure. There must be an even number of items in a map, and the total length +of the data must be equal to `L`. + +The length of a map is `L`. + +## Small Enum + +A small enum data type is represented by the id `e`. After the id is a recursive +mark we will call `V`. + +The data represented by a small enum is an unsigned 8-bit integer that +represents the variant of the enum. After the variant is the data for `V`. No +mark is required since `V` has already been defined. + +The length of a small enum is `1 + Len(V)`. + +## Enum + +An enum data type is represented by the id `E`. After the id is a recursive +mark we will call `V`. + +The data represented by a small enum is a little-endian unsigned 16-bit integer +that represents the variant of the enum. After the variant is the data for `V`. +No mark is required since `V` has already been defined. + +The length of an enum is `2 + Len(V)`. + +## Big Enum + +A big enum data type is represented by the id `U`. After the id is a recursive +mark we will call `V`. + +The data represented by a small enum is a little-endian unsigned 32-bit integer +that represents the variant of the enum. After the variant is the data for `V`. +No mark is required since `V` has already been defined. + +The length of an enum is `4 + Len(V)`. + +## Implicit Types + +There are a few types that are not exposed to the user. These are designed to +help optimize the file for I/O. A more detailed discussion about I/O +optimizations will be discussed somewhere else _TODO_. + +### Small Pointer + +A small pointer data type is represented by the id `p`. There is nothing more to +the mark. + +The data of the small pointer is a little-endian unsigned 16-bit integer which +represents a location within the file where the value can be found. + +The length of a small pointer is always 2. + +### Pointer + +A pointer data type is represented by the id `P`. There is nothing more to the +mark. + +The data of the small pointer is a little-endian unsigned 32-bit integer which +represents a location within the file where the value can be found. + +The length of a pointer is always 4. + +### Big Pointer + +A pointer data type is represented by the id `T`. There is nothing more to the +mark. + +The data of the small pointer is a little-endian unsigned 64-bit integer which +represents a location within the file where the value can be found. + +The length of a big pointer is always 8. + +### Reserved + +A reserved data type is represented by the id `r`. After the id is a size +indicator we will call `L`. + +The data of the reserved item is unknown. This data should not be read from. The +only requirement is that there must be `L` bytes of data. + +The length of reserved space is `L`. + +### Empty + +An empty data type is represented only by the id `\x00`. There is nothing more +to the mark and there is no data associated with an empty. + +Empty is designed to be used in a similar way to reserved, but where reserved +cannot fit. + +The length of empty is always `0`. + +### Rc + +An rc is a pointer receiver that counts how many references there are to it. + +#### Small Rc + +A small rc data type is represented by the id `x`. After the id is a mark we +will call `V`. + +The data of a small rc is a 1-byte unsigned integer that represents the number +of references to the value. After this is the data value `V`. + +The length of small rc is `1 + Len(V)` + +#### Rc + +An rc data type is represented by the id `X`. After the id is a mark we will +call `V`. + +The data of a small rc is a 2-byte little-endian unsigned integer that +represents the number of references to the value. After this is the data value +`V`. + +The length of rc is `2 + Len(V)` + +#### Big Rc + +An big rc data type is represented by the id `y`. After the id is a mark we will +call `V`. + +The data of a small rc is a 4-byte little-endian unsigned integer that +represents the number of references to the value. After this is the data value +`V`. + +The length of big rc is `4 + Len(V)` + +### Heap + +A heap data type is represented by the id `k`. After the id is a size indicator +we will call `L`. + +The data of the heap is a sequence of reserved, empty, small rc, rc, or big rc. +This is reserved for pointer values. The contents of the heap must be exactly +`L` bytes long. + +The length of the heap is `L`. + +# Mark Grammar + +Below is a comprehensive grammar for marks in the mbon format. + +```rust +SizeEnd = { '\x00'..'\x7f' } // 0b0...... +SizeContinue = { '\x80'..'\xff' } // 0b1...... +Size = { SizeContinue ~ Size | SizeEnd } + +MarkU8 = { "b" } +MarkI8 = { "B" } +MarkU16 = { "h" } +MarkI16 = { "H" } +MarkU32 = { "i" } +MarkI32 = { "I" } +MarkU64 = { "l" } +MarkI64 = { "L" } +MarkF32 = { "f" } +MarkF64 = { "F" } +MarkNull = { "n" } + +MarkSmallChar = { "c" } +MarkChar = { "C" } +MarkBigChar = { "G" } + +MarkStr = { "s" ~ Size } +MarkArr = { "a" ~ Mark ~ Size } +MarkList = { "A" ~ Size } +MarkDict = { "d" ~ Mark ~ Mark ~ Size } +MarkMap = { "D" ~ Size } + +MarkSmallEnum = { "e" ~ Mark } +MarkEnum = { "E" ~ Mark } +MarkBigEnum = { "U" ~ Mark } + +MarkSmallPtr = { "p" } +MarkPtr = { "P" } +MarkBigPtr = { "T" } + +MarkReserved = { "r" ~ Size } +MarkEmpty = { "\x00" } + +MarkSmallRc = { "x" ~ Mark } +MarkRc = { "X" ~ Mark } +MarkBigRc = { "y" ~ Mark } + +MarkHeap = { "k" ~ Size } + +Mark = { + MarkU8 | MarkI8 + | MarkU16 | MarkI16 + | MarkU32 | MarkI32 + | MarkU64 | MarkI64 + | MarkF32 | MarkF64 + | MarkNull + | MarkSmallChar | MarkChar | MarkBigChar + | MarkStr + | MarkArr | MarkList + | MarkDict | MarkMap + | MarkSmallEnum | MarkEnum | MarkBigEnum + | MarkSmallPtr | MarkPtr | MarkBigPtr + | MarkEmpty | MarkEmtpy + | MarkSmallRc | MarkRc | MarkBigRc + | MarkHeap +} + +``` From 0cc5d1b3a31d63e40ffec2a96004a0e8efa3c1e7 Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Mon, 19 Feb 2024 01:02:17 -0700 Subject: [PATCH 02/10] Change from ascii ids to binary ids #1 --- spec/types.md | 651 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 388 insertions(+), 263 deletions(-) diff --git a/spec/types.md b/spec/types.md index d8fcfd3..cef6e55 100644 --- a/spec/types.md +++ b/spec/types.md @@ -4,6 +4,13 @@ In mbon, data is made out of items. These items are made of two parts: A mark, and a value. Unless otherwise specified, an item is always a mark followed by data. +> All grammars in this document are written with the [pest] grammar language. + +[pest]: https://pest.rs/book/ + +> There are several blocks of code which are written in a pseudo-code of rust. +> It will use a familiar rust syntax, but will likely not compile. + ## Size Some marks have a size indicator. This indicator is dynamically sized. The @@ -13,16 +20,18 @@ The indicator starts at one byte in length. There is a continuation bit in each byte of the indicator. This is the most significant bit in each byte. If it is 1, then there is more to read, otherwise the size indicator is finished. +When reading a size indicator, the most significant bit of each byte is ignored. +The value is read as a little-endian unsigned integer. Overall, sizes may not be +larger than 64 bits or 10 characters. + +### Size Grammar + ```rust SizeEnd = { '\x00'..'\x7f' } // 0b0000_0000 through 0b0111_1111 SizeContinue = { '\x80'..'\xff' } // 0b1000_0000 through 0b1111_1111 Size = { SizeContinue ~ Size | SizeEnd } ``` -When reading a size indicator, the most significant bit of each byte is ignored. -The value is read as a little-endian unsigned integer. Overall, sizes may not be -larger than 64 bits or 10 characters. - ### Examples Given the data (hex)`5a b3 06`, We would first read `5a` which is (bin)`0 @@ -35,395 +44,511 @@ so we read the next byte (hex)`06` (bin)`0 0000011`. We add `0b0000011 << (1 * 7)` to the sum and get `0x1b3`. The most significant bit is 0, so we are done with a final size of 435. -## U8 - -A u8 data type is represented by the id `b`. There is nothing more to the -mark. +## IDs -The data represented is an unsigned 8-bit integer. +Every item has an id to identify its type. This is a single byte which is used +to know what the type is. There are five parts to an id: E, P, S, T and B. -The length of a u8 is always 1. +* E bit 7: whether there is a body associated with the type. +* P bit 6: whether the type is publicly available. +* S bit 5: whether the type has a fixed size. +* T bits 2-4: the type id (which is only unique to each E, P, S combination) +* B bits 0-1: The number of bytes in the fixed size value (which is `2^B`). -## I8 +Below is a diagram of how the bits are structured in the id byte as well as some +pseudo-code definitions that will be used in type descriptions. -An i8 data type is represented by the id `B`. There is nothing more to the -mark. +``` +7 6 5 432 10 +E P S TTT BB +``` -The data represented is a signed 8-bit integer. +```rust +let E = 1u8 << 7; +let P = 1u8 << 6; +let S = 1u8 << 5; +let T = |v: u8| (v << 2) & 0b0001_1100; +let B = |v: u8| v & 0b0000_0011; +let b_iter = |r: Range, id: u8| r.map(|v| id | B(v)).collect::>(); +let len_b = |id: u8| 2u8.pow(id & 0b0000_0011); +``` -The length of an i8 is always 1. +## Types + +[Null]: #null +[Unsigned]: #unsigned +[Signed]: #signed +[Float]: #float +[Char]: #char +[String]: #string +[Array]: #array +[List]: #list +[Struct]: #struct +[Map]: #map +[Enum]: #enum +[Space]: #space +[Padding]: #padding +[Pointer]: #pointer +[Rc]: #rc +[Heap]: #heap + +Below are definitions of all the mbon types. + +### Null + +A null data type is represented by the id (hex)`c0`. There is nothing more to +the mark. -## U16 +There is no data associated with the null type. -A u16 data type is represented by the id `h`. There is nothing more to the -mark. +```rust +let id = E | P | T(0); // 0xc0 +let len = 0; +``` -The data represented is a little-endian unsigned 16-bit integer. +#### Null Grammar -The length of a u16 is always 2. +```rust +MarkNull = { "\xc0" } +``` -## I16 +### Unsigned -An i16 data type is represented by the id `H`. There is nothing more to the -mark. +The unsigned data type is represented by the ids (hex)`64 65 66 67`. There is +nothing more to the mark. -The data represented is a little-endian signed 16-bit integer. +The data is a little-endian unsigned integer of `len_b(id)` bytes. -The length of an i16 is always 2. +* `64`: 1-byte (u8) +* `65`: 2-byte (u16) +* `66`: 4-byte (u32) +* `67`: 8-byte (u64) -## U32 +```rust +let id = b_iter(0..4, P | S | T(1)); // [0x64, 0x65, 0x66, 0x67] +let len = len_b(id); +``` -A u32 data type is represented by the id `i`. There is nothing more to the -mark. +#### Unsigned Grammar -The data represented is a little-endian unsigned 32-bit integer. +```rust +MarkU8 = { "\x64" } +MarkU16 = { "\x65" } +MarkU32 = { "\x66" } +MarkU64 = { "\x67" } +MarkUnsigned = { MaarkU8 | MarkU16 | MarkU32 | MarkU64 } +``` -The length of a u32 is always 4. +### Signed -## I32 +The signed data type is represented by the ids (hex)`68 69 6a 6b`. There is +nothing more to the mark. -An i32 data type is represented by the id `I`. There is nothing more to the -mark. +The data is a little-endian signed integer of `len_b(id)` bytes. -The data represented is a little-endian signed 32-bit integer. +* `68`: 1-byte (i8) +* `69`: 2-byte (i16) +* `6a`: 4-byte (i32) +* `6b`: 8-byte (i64) -The length of an i32 is always 4. +```rust +let id = b_iter(0..4, P | S | T(2)); // [0x68, 0x69, 0x6a, 0x6b] +let len = len_b(id); +``` -## U64 +#### Signed Grammar -A u64 data type is represented by the id `l`. There is nothing more to the -mark. +```rust +MarkI8 = { "\x68" } +MarkI16 = { "\x69" } +MarkI32 = { "\x6a" } +MarkI64 = { "\x6b" } +MarkSigned = { MaarkI8 | MarkI16 | MarkI32 | MarkI64 } +``` -The data represented is a little-endian unsigned 64-bit integer. +### Float -The length of a u64 is always 8. +The signed data type is represented by the ids (hex)`6e 6f`. There is +nothing more to the mark. -## I64 +The data is a little-endian IEEE-754 float of `len_b(id)` bytes. -An i64 data type is represented by the id `L`. There is nothing more to the -mark. +* `6e`: 4-byte (f32) +* `6f`: 8-byte (f64) -The data represented is a little-endian signed 64-bit integer. +```rust +let id = b_iter(2..4, P | S | T(3)); // [0x6e, 0x6f] +let len = len_b(id); +``` -The length of an i64 is always 8. +#### Float Grammar -## F32 +```rust +MarkF32 = { "\x6e" } +MarkF64 = { "\x6f" } +MarkFloat = { MarkF32 | MarkF64 } +``` -An f32 data type is represented by the id `f`. There is nothing more to the -mark. +### Char -The data represented is a little-endian IEEE-754 float. +The char data type is represented by the ids (hex)`70 71 72`. There is nothing +more to the mark. -The length of an f32 is always 4. +The data is a little-endian unsigned integer of `len_b(id)` bytes which represent +a UTF code point. -## F64 +* `70`: 1-byte (u8 char) +* `71`: 2-byte (u16 char) +* `72`: 4-byte (u32 char) -An i64 data type is represented by the id `F`. There is nothing more to the -mark. +```rust +let id = b_iter(2..3, P | S | T(4)); // [0x70, 0x71, 0x72] +let len = len_b(id); +``` -The data represented is a little-endian IEEE-754 double. +#### Char Grammar -The length of an f64 is always 8. +```rust +MarkC8 = { "\x70" } +MarkC16 = { "\x71" } +MarkC32 = { "\x72" } +MarkChar = { MarkC8 | MarkC16 | MarkC32 } +``` -## Null +### String -A null data type is represented by the id `n`. There is nothing more to the -mark. +A string data type is represented by the id (hex)`54`. After the id, is a size +indicator we will call `L`. -There is no data associated with a null. +The data represented by a string is a UTF-8 encoded string of `L` bytes. -The length of a null is always 0. +```rust +let id = P | T(5); // 0x54 +let len = L; +``` -## Chars +#### String Grammar -Characters are all represented by UTF code points. A majority of english -characters fit within the 8-bit range of a byte. In many other languages, most -will fit within 16-bits. And all characters can fit within 32-bits. +```rust +MarkString = { "\x54" ~ Size } +``` -It is possible to have characters represented by UTF-8, but that would require a -size indicator in the mark to know how long the character is. A better option -would be to have 3 char types each with different sizes to accommodate for all -possible characters without wasting unused space. +### Array -### Small Char +An array data type is represented by the id (hex)`40`. After the id is a +recursive mark we will call `V`. After `V` is a size indicator we will call `N`. -A char data type is represented by the id `c`. There is nothing more to the -mark. The data represented is a unsigned 8-bit integer which represents a UTF -code point. If the code point doesn't fit within an 8-bit value, then Char or -Big Char should be used instead. +The data represented by an array is a sequence of `N` data items of type `V`. No +marks are required for each sub-item since it has already been defined by `V`. -The length of a small char is always 1. +Note that all values in the array must be homogeneous. This severely limits what +can be used for an array. If an item cannot be stored in an array, then [List] +should be used instead. -### Char +```rust +let id = P | T(0); // 0x40 +let len = data_len(V) * N; +``` -A char data type is represented by the id `C`. There is nothing more to the -mark. The data represented is a little-endian unsigned 16-bit integer which -represents a UTF code point. If the code point doesn't fit within a 16-bit -value, then Big Char should be used instead. +#### Array Grammar -The length of a char is always 2. +```rust +MarkArray = { "\x40" ~ Mark ~ Size } +``` -### Big Char +### List -A char data type is represented by the id `G`. There is nothing more to the -mark. The data represented is a little-endian unsigned 32-bit integer which -represents a UTF code point. +A list data type is represented by the id (hex)`44`. After the id is a size +indicator we will call `L`. -The length of a big char is always 4. +The data represented by a list is a sequence of items where the total size of +all the items add up to `L` e.g. The contents of the list must be exactly `L` +bytes long. -## String +```rust +let id = P | T(1); // 0x44 +let len = L; +``` -A string data type is represented by the id `s`. After the id, is a size -indicator we will call `L`. +#### List Grammar -The data represented by a string is a UTF-8 encoded string of `L` bytes. +```rust +MarkList = { "\x44" ~ Size } +``` -The length of a string is `L`. +### Struct -## Array +A struct data type is represented by the id (hex)`40`. After the id is two marks +we will call `K` and `V` respectively. After `V` is a size indicator we will +call `N`. -An array data type is represented by the id `a`. After the id is a recursive -mark we will call `I`. After `I` is a size indicator we will call `N`. +The data represented by a struct is a sequence of `N` pairs of `K`-`V` data +items. No marks are required for each of these items since they have already +been defined by `K` and `V`. There are a total of `N * 2` items in a dict and +each pair of items are `K` then `V`. -The data represented by an array is a sequence of `N` data items of type `I`. No -marks are required for each sub-item since it has already been defined by `I`. +Note that all values in the struct must be homogeneous. This severely limits +what can be used for a struct. If an item cannot be stored in a struct, then +[Map] should be used instead. -The length of an array is `Len(I) * N`. +```rust +let id = P | T(2); // 0x48 +let len = (data_len(K) + data_len(V)) * N; +``` -## List +#### Struct Grammar -A list data type is represented by the id `A`. After the id is a size indicator -we will call `L`. +```rust +MarkStruct = { "\x48" ~ Mark ~ Mark ~ Size } +``` -The data represented by a list is a sequence of items where the total size of -all the items add up to `L` e.g. The contents of the list must be exactly `L` -bytes long. +### Map -The length of an list is `L`. +A map data type is represented by the id (hex)`4c`. After the id is a size +indicator we will call `L`. -## Dict +The data represented by a map is a sequence of pairs of items in a key-value +structure. There must be an even number of items in a map, and the total length +of the data must be equal to `L`. -A dict data type is represented by the id `d`. After the id is two marks we will -call `K` and `V` respectively. After `V` is a size indicator we will call `N`. +```rust +let id = P | T(3); // 0x4c +let len = L; +``` -The data represented by a dict is a sequence of `N` pairs of `K`-`V` data -items. No marks are required for each sub-item since they have already been -defined by `K` and `V`. There are a total of `N * 2` items in a dict and each -pair of items are `K` then `V`. +#### Map Grammar -The length of a dict is `(Len(K) + Len(V)) * N`. +```rust +MarkMap = { "\x4c" ~ Size } +``` -## Map +### Enum -A map data type is represented by the id `D`. After the id is a size indicator -we will call `L`. +The enum data type is represented by the ids (hex)`74 75 76`. After the id is +a recursive mark we will call `V`. -The data represented by a map is a sequence of pairs of items in a key-value -structure. There must be an even number of items in a map, and the total length -of the data must be equal to `L`. +The data represented by the enum is a little-endian unsigned integer with +`len_b(id)` bytes which represents the variant of the enum. After the variant +value is the data of `V`. No mark is required since `V` has already been defined -The length of a map is `L`. +* `74`: 1-byte (u8 variant) +* `75`: 2-byte (u16 variant) +* `76`: 4-byte (u32 variant) -## Small Enum +```rust +let id = b_iter(0..3, P | S | T(5)); // [0x74, 0x75, 0x76] +let len = len_b(id) + data_len(v); +``` -A small enum data type is represented by the id `e`. After the id is a recursive -mark we will call `V`. +#### Enum Grammar -The data represented by a small enum is an unsigned 8-bit integer that -represents the variant of the enum. After the variant is the data for `V`. No -mark is required since `V` has already been defined. +```rust +MarkE8 = { "\x74"} +MarkE16 = { "\x75" } +MarkE32 = { "\x76" } +MarkEnum = { (MarkE8 | MarkE16 | MarkE32) ~ Mark } +``` -The length of a small enum is `1 + Len(V)`. +### Space -## Enum +The space type is represented by the id (hex)`80`. There is nothing more to the +mark. -An enum data type is represented by the id `E`. After the id is a recursive -mark we will call `V`. +There is no data associated with space. -The data represented by a small enum is a little-endian unsigned 16-bit integer -that represents the variant of the enum. After the variant is the data for `V`. -No mark is required since `V` has already been defined. +The space type is used as padding between items if needed. Whenever possible, +[Padding] should be preferred. -The length of an enum is `2 + Len(V)`. +```rust +let id = E | T(0); // 0x80 +let len = 0; +``` -## Big Enum +#### Space Grammar -A big enum data type is represented by the id `U`. After the id is a recursive -mark we will call `V`. +```rust +MarkSpace = { "\x80" } +``` -The data represented by a small enum is a little-endian unsigned 32-bit integer -that represents the variant of the enum. After the variant is the data for `V`. -No mark is required since `V` has already been defined. +### Padding -The length of an enum is `4 + Len(V)`. +The padding type is represented by the id (hex)`04`. After the id is a size +indicator we will call `L`. -## Implicit Types +The data of a reserved item is `L` bytes of unused space. The contents should +not be read from since it will be considered junk. -There are a few types that are not exposed to the user. These are designed to -help optimize the file for I/O. A more detailed discussion about I/O -optimizations will be discussed somewhere else _TODO_. +```rust +let id = T(1); // 0x04 +let len = L; +``` -### Small Pointer +#### Padding Grammar -A small pointer data type is represented by the id `p`. There is nothing more to -the mark. +```rust +MarkPadding = { "\x04" ~ Size } +``` -The data of the small pointer is a little-endian unsigned 16-bit integer which -represents a location within the file where the value can be found. +### Pointer -The length of a small pointer is always 2. +The pointer type is represented by the ids (hex)`28 29 2a 2b`. There is nothing +else to the mark. -### Pointer +The data of a pointer is a little-endian unsigned integer with `len_b(id)` bytes +which represent a location in the mbon file we will call `P`. The contents at +`P` must be the start of a valid mbon item. -A pointer data type is represented by the id `P`. There is nothing more to the -mark. +* `28`: 1-byte (u8 address) +* `29`: 2-byte (u16 address) +* `2a`: 4-byte (u32 address) +* `2b`: 8-byte (u64 address) -The data of the small pointer is a little-endian unsigned 32-bit integer which -represents a location within the file where the value can be found. +```rust +let id = b_iter(0..4, S | T(2)); // [0x28, 0x29, 0x2a, 0x2b] +let len = len_b(id); +``` -The length of a pointer is always 4. +#### Pointer Grammar -### Big Pointer +```rust +MarkP8 = { "\x28" } +MarkP16 = { "\x29" } +MarkP32 = { "\x2a" } +MarkP64 = { "\x2b" } +MarkPointer = { MarkP8 | MarkP16 | MarkP32 | MarkP64 } +``` -A pointer data type is represented by the id `T`. There is nothing more to the -mark. +### Rc -The data of the small pointer is a little-endian unsigned 64-bit integer which -represents a location within the file where the value can be found. +The rc type is represented by the ids (hex)`2c 2d 2e 2f`. After the id is a mark +we will call `V`. -The length of a big pointer is always 8. +The data of an rc is a little-endian unsigned integer with `len_b(id)` bytes +that represents the number of references to this item. After which is the data +for `V`. No mark is required since `V` has already been defined. -### Reserved +Rc's should always be used alongside pointers. They should be treated like an +invisible box most of the time; Only when doing pointer operations should rc's +be considered. -A reserved data type is represented by the id `r`. After the id is a size -indicator we will call `L`. +* `2c`: 1-byte (u8 variant) +* `2d`: 2-byte (u16 variant) +* `2e`: 4-byte (u32 variant) +* `2f`: 8-byte (u64 variant) -The data of the reserved item is unknown. This data should not be read from. The -only requirement is that there must be `L` bytes of data. +```rust +let id = b_iter(0..4, S | T(3)); // [0x2c, 0x2d, 0x2e, 0x2f] +let len = len_b(id) + data_len(V); +``` -The length of reserved space is `L`. +#### Rc Grammar -### Empty +```rust +MarkR8 = { "\x2c" } +MarkR16 = { "\x2d" } +MarkR32 = { "\x2e" } +MarkR64 = { "\x2f" } +MarkRc = { (MarkR8 | MarkR16 | MarkR32 | MarkR64) ~ Mark } +``` -An empty data type is represented only by the id `\x00`. There is nothing more -to the mark and there is no data associated with an empty. +### Heap -Empty is designed to be used in a similar way to reserved, but where reserved -cannot fit. +The heap type is represented by the id (hex)`10`. After the id is a size +indicator we will call `L`. -The length of empty is always `0`. +The data of the heap is a sequence of items where the total size of all the +items add up to `L`. -### Rc +The contents of the heap are hidden from the user, in other words it should be +treated like padding, but with valid data inside. The only way the user can +access items in the heap is through [Pointer]s. The heap should be a root level +item of the mbon file. -An rc is a pointer receiver that counts how many references there are to it. +```rust +let id = T(4); // 0x10 +let len = L; +``` -#### Small Rc +#### Heap Grammar -A small rc data type is represented by the id `x`. After the id is a mark we -will call `V`. +```rust +MarkHeap = { "\x10" ~ Size } +``` -The data of a small rc is a 1-byte unsigned integer that represents the number -of references to the value. After this is the data value `V`. +# Full Mark Grammar -The length of small rc is `1 + Len(V)` +Below is a comprehensive grammar for marks in the mbon format. -#### Rc +```rust +SizeEnd = { '\x00'..'\x7f' } // 0b0...... +SizeContinue = { '\x80'..'\xff' } // 0b1...... +Size = { SizeContinue ~ Size | SizeEnd } -An rc data type is represented by the id `X`. After the id is a mark we will -call `V`. +Mark = { + MarkNull + | MarkUnsigned | MarkSigned | MarkFloat + | MarkChar | MarkString + | MarkArray | MarkList + | MarkStruct | MarkMap + | MarkEnum + | MarkSpace | MarkPadding + | MarkPointer | MarkRc | MarkHeap +} -The data of a small rc is a 2-byte little-endian unsigned integer that -represents the number of references to the value. After this is the data value -`V`. +MarkNull = { "\xc0" } -The length of rc is `2 + Len(V)` +MarkU8 = { "\x64" } +MarkU16 = { "\x65" } +MarkU32 = { "\x66" } +MarkU64 = { "\x67" } +MarkUnsigned = { MaarkU8 | MarkU16 | MarkU32 | MarkU64 } -#### Big Rc +MarkI8 = { "\x68" } +MarkI16 = { "\x69" } +MarkI32 = { "\x6a" } +MarkI64 = { "\x6b" } +MarkSigned = { MaarkI8 | MarkI16 | MarkI32 | MarkI64 } -An big rc data type is represented by the id `y`. After the id is a mark we will -call `V`. +MarkF32 = { "\x6e" } +MarkF64 = { "\x6f" } +MarkFloat = { MarkF32 | MarkF64 } -The data of a small rc is a 4-byte little-endian unsigned integer that -represents the number of references to the value. After this is the data value -`V`. +MarkC8 = { "\x70" } +MarkC16 = { "\x71" } +MarkC32 = { "\x72" } +MarkChar = { MarkC8 | MarkC16 | MarkC32 } -The length of big rc is `4 + Len(V)` +MarkString = { "\x54" ~ Size } -### Heap +MarkArray = { "\x40" ~ Mark ~ Size } -A heap data type is represented by the id `k`. After the id is a size indicator -we will call `L`. +MarkList = { "\x44" ~ Size } -The data of the heap is a sequence of reserved, empty, small rc, rc, or big rc. -This is reserved for pointer values. The contents of the heap must be exactly -`L` bytes long. +MarkStruct = { "\x48" ~ Mark ~ Mark ~ Size } -The length of the heap is `L`. +MarkMap = { "\x4c" ~ Size } -# Mark Grammar +MarkE8 = { "\x74"} +MarkE16 = { "\x75" } +MarkE32 = { "\x76" } +MarkEnum = { (MarkE8 | MarkE16 | MarkE32) ~ Mark } -Below is a comprehensive grammar for marks in the mbon format. +MarkSpace = { "\x80" } -```rust -SizeEnd = { '\x00'..'\x7f' } // 0b0...... -SizeContinue = { '\x80'..'\xff' } // 0b1...... -Size = { SizeContinue ~ Size | SizeEnd } +MarkPadding = { "\x04" ~ Size } -MarkU8 = { "b" } -MarkI8 = { "B" } -MarkU16 = { "h" } -MarkI16 = { "H" } -MarkU32 = { "i" } -MarkI32 = { "I" } -MarkU64 = { "l" } -MarkI64 = { "L" } -MarkF32 = { "f" } -MarkF64 = { "F" } -MarkNull = { "n" } - -MarkSmallChar = { "c" } -MarkChar = { "C" } -MarkBigChar = { "G" } - -MarkStr = { "s" ~ Size } -MarkArr = { "a" ~ Mark ~ Size } -MarkList = { "A" ~ Size } -MarkDict = { "d" ~ Mark ~ Mark ~ Size } -MarkMap = { "D" ~ Size } - -MarkSmallEnum = { "e" ~ Mark } -MarkEnum = { "E" ~ Mark } -MarkBigEnum = { "U" ~ Mark } - -MarkSmallPtr = { "p" } -MarkPtr = { "P" } -MarkBigPtr = { "T" } - -MarkReserved = { "r" ~ Size } -MarkEmpty = { "\x00" } - -MarkSmallRc = { "x" ~ Mark } -MarkRc = { "X" ~ Mark } -MarkBigRc = { "y" ~ Mark } - -MarkHeap = { "k" ~ Size } +MarkP8 = { "\x28" } +MarkP16 = { "\x29" } +MarkP32 = { "\x2a" } +MarkP64 = { "\x2b" } +MarkPointer = { MarkP8 | MarkP16 | MarkP32 | MarkP64 } -Mark = { - MarkU8 | MarkI8 - | MarkU16 | MarkI16 - | MarkU32 | MarkI32 - | MarkU64 | MarkI64 - | MarkF32 | MarkF64 - | MarkNull - | MarkSmallChar | MarkChar | MarkBigChar - | MarkStr - | MarkArr | MarkList - | MarkDict | MarkMap - | MarkSmallEnum | MarkEnum | MarkBigEnum - | MarkSmallPtr | MarkPtr | MarkBigPtr - | MarkEmpty | MarkEmtpy - | MarkSmallRc | MarkRc | MarkBigRc - | MarkHeap -} +MarkR8 = { "\x2c" } +MarkR16 = { "\x2d" } +MarkR32 = { "\x2e" } +MarkR64 = { "\x2f" } +MarkRc = { (MarkR8 | MarkR16 | MarkR32 | MarkR64) ~ Mark } +MarkHeap = { "\x10" ~ Size } ``` + From d99b1b8c09f77e49045a1c55fc6f6532831cc736 Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Mon, 19 Feb 2024 18:08:26 -0700 Subject: [PATCH 03/10] Add behaviour and examples --- spec/behaviour.md | 72 ++++++++++++++++++++++++++++++++++++++++ spec/examples.md | 74 ++++++++++++++++++++++++++++++++++++++++++ spec/file-structure.md | 39 +++++++++++++++------- spec/index.md | 3 +- spec/types.md | 10 +++--- 5 files changed, 181 insertions(+), 17 deletions(-) create mode 100644 spec/behaviour.md create mode 100644 spec/examples.md diff --git a/spec/behaviour.md b/spec/behaviour.md new file mode 100644 index 0000000..222b485 --- /dev/null +++ b/spec/behaviour.md @@ -0,0 +1,72 @@ +# Behaviour + +Mbon is meant to be used as a dynamic file format. Where only parts of a file +are loaded into memory and changes can be made to the file while in operation. +I/O operations can take a long time and so they are used efficiently. + +Because of this, there are some guidelines on how mbon files should be +read/written to depending on their modes of access. + +## In-memory mode + +The simplest way to access an mbon file is to have it fully in memory. This is +closer to any other conventional file format where the whole file needs to be +parsed into memory. Serde implementations would be operating in this mode. + +This is most useful for data being transferred over the network, or small files. + +## Read-only mode + +This mode allows for multiple processes to access the file at the same time and +the file does not need to be completely loaded into memory. + +In order to open in read-only mode, there must not be a write lock on the file. +To prevent other processes from opening the file in write mode, a read lock +should be created. This is a file called `{filename}.read.lock`. The contents of +which is the number of processes that are reading the file. If the read lock +already exists, then the contents should be incremented by one. When finished, +the contents of the read lock should be decremented by 1. If the value is 0, +then the read lock can be safely deleted. + +This mode is most useful when the file being read is large and no changes will +be made to it, such as with static data files. + +### Reading + +When reading from a file, there should be an in-memory cache of the file. This +cache should contain the data of what has been read from the file. When an item +is requested, the cache should be checked first before seeking the file. + +The user will provide the location of the item they want to read. Depending on +the type of item, specific indexes or sections of the item may be requested. The +engine may skip through items until the requested item is found. + +Since disks will read one sector at a time, it is recommended that when +performing a read, the sector (usually 512 bytes) should be cached to prevent +future reads from disk. + + +## Read-write mode + +This mode allows for a single process to access the file and can be +simultaneously read/written from. + +In order to open in read-write mode, there must not be a read lock on the file +nor a write lock. If it can be opened, then a write lock file can be created. +This file is named `{filename}.write.lock`. It has no contents. When finished +with the file, then the write lock may be deleted. + +This mode is most useful when the file being accessed is large and is being +updated, such as with game save states. + +### Writing + +Writing to a file can be complicated. When possible, large sections of the file +should not be moved around. If a change to an item caused it to shrink and items +after it would shift to the left, padding can be inserted to maintain the items' +positions. If a change to an item caused it to grow and items would shift to the +right, the changed item can be moved to the heap. + +Much more advanced logic can be designed to minify the number of writes that +need to be made, but This simple algorithm should be sufficient for most +applications. diff --git a/spec/examples.md b/spec/examples.md new file mode 100644 index 0000000..6b041d4 --- /dev/null +++ b/spec/examples.md @@ -0,0 +1,74 @@ +# Examples + +In this document, I will provide a few examples of mbon files and how they work. + +> File blocks will be written with a combination of hex and ascii. A hex +> byte will always be 2 characters long: `ab`, while an ascii byte will +> always be 1 `h`. Each byte is separated by a space +> +> ``` +> h e l l o 20 w o r l d ! +> ``` + +> Here I will be describing mbon files in a high level similar to yaml. + + +## Simple Example + + +``` +list: +- unsigned<16>(1234) +- string("Hello World!") +- array>("My binary data") +``` + +``` +EE m b o n 0D 0A 00 +65 D2 04 +54 12 H e l l o 20 W o r l d ! +40 64 14 M y 20 b i n a r y 20 d a t a +``` + +## Map + +``` +map: + string("val"): unsigned<16>(1234) + string("str"): string("Hello World!") + string("bin"): array>("My binary data") +``` + +``` +EE m b o n 0D 0A 00 +4c 31 + 54 03 v a l 65 D2 04 + 54 03 s t r 54 0C H e l l o 20 W o r l d ! + 54 03 b i n 40 64 0E M y 20 b i n a r y 20 d a t a +``` + +## Pointers + +This example is the same as above, but all the keys of the map are pointers to +rc's of strings. + +``` +map: + string("val"): unsigned<16>(1234) + string("str"): string("Hello World!") + string("bin"): array>("My binary data") +``` + +``` +EE m b o n 0D 0A 00 +4c 28 + 28 4d 65 D2 04 + 28 46 54 0C H e l l o 20 W o r l d ! + 28 3f 40 64 0E M y 20 b i n a r y 20 d a t a +10 20 + 04 09 00 00 00 00 00 00 00 00 00 + 2C 54 03 b i n + 2C 54 03 s t r + 2C 54 03 v a l +``` + diff --git a/spec/file-structure.md b/spec/file-structure.md index f93700f..b9138b5 100644 --- a/spec/file-structure.md +++ b/spec/file-structure.md @@ -4,7 +4,7 @@ An mbon file is made up of two parts: A header, and the content. ## Header -The header begins with this 8-byte signature (hex)`EE 6D 62 6E 0D 0A 1A 00`. +The header begins with this 8-byte signature (hex)`EE 6D 62 6F 6E 0D 0A 00`. After the signature is a single byte version number, currently (hex)`01`. The signature comes from the PNG signature and the explanation will be repeated @@ -23,25 +23,42 @@ here. The 8-byte signature of mbon in decimal, hex and ascii is ``` -(dec) 238 109 98 110 13 10 26 0 -(hex) EE 6D 62 6E 0D 0A 1A 00 -(ascii) \356 m b n \r \n \032 \0 +(dec) 238 109 98 111 110 13 10 0 +(hex) EE 6D 62 6F 6E 0D 0A 00 +(ascii) \356 m b o n \r \n \0 ``` Like the png signature, this signature aims to have a unique value that can be used to know that the file is an mbon file without relying on an extension as -well as determine if the file has encountered problems in transfer. +well as determine if the file has encountered common problems in transfer. After the header immediately begins the main content of the file. ## Content -The content of an mbon file may be a sequence of items much like a list. After -the primary contents of the file may be a heap of pointer data. This is a single -heap value. +The content of an mbon file is a sequence of items much like a [List]. Along +with the items can be [Heap] items. These are hidden from the user, but is used +to store data outside of the main item tree. -In order to allow for the main content to grow, it is recommended that the -heap leaves a buffer of reserved data at the beginning of the heap. +If heaps are used, they should have padding that would allow for the main item +tree to grow. Descriptions of the different types available are available at -[types.md](types.md). +[types](types.md). + +[Null]: types.md#null +[Unsigned]: types.md#unsigned +[Signed]: types.md#signed +[Float]: types.md#float +[Char]: types.md#char +[String]: types.md#string +[Array]: types.md#array +[List]: types.md#list +[Struct]: types.md#struct +[Map]: types.md#map +[Enum]: types.md#enum +[Space]: types.md#space +[Padding]: types.md#padding +[Pointer]: types.md#pointer +[Rc]: types.md#rc +[Heap]: types.md#heap diff --git a/spec/index.md b/spec/index.md index 8378965..4d8b093 100644 --- a/spec/index.md +++ b/spec/index.md @@ -11,5 +11,6 @@ implementations should behave with the data. * [File Structure](file-structure.md) * [Types](types.md) - +* [Behaviour](behaviour.md) +* [Examples](examples.md) diff --git a/spec/types.md b/spec/types.md index cef6e55..332e802 100644 --- a/spec/types.md +++ b/spec/types.md @@ -427,14 +427,14 @@ The data of an rc is a little-endian unsigned integer with `len_b(id)` bytes that represents the number of references to this item. After which is the data for `V`. No mark is required since `V` has already been defined. -Rc's should always be used alongside pointers. They should be treated like an +Rc's should always be used alongside [Pointer]s. They should be treated like an invisible box most of the time; Only when doing pointer operations should rc's be considered. -* `2c`: 1-byte (u8 variant) -* `2d`: 2-byte (u16 variant) -* `2e`: 4-byte (u32 variant) -* `2f`: 8-byte (u64 variant) +* `2c`: 1-byte (u8 reference count) +* `2d`: 2-byte (u16 reference count) +* `2e`: 4-byte (u32 reference count) +* `2f`: 8-byte (u64 reference count) ```rust let id = b_iter(0..4, S | T(3)); // [0x2c, 0x2d, 0x2e, 0x2f] From 654957e7849a2640fa4c90dfb40458375c383529 Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Sat, 2 Mar 2024 02:03:10 -0700 Subject: [PATCH 04/10] Start the rewrite --- Cargo.lock | 321 +++++++++++---- Cargo.toml | 32 +- readme.md | 336 +--------------- src/async_wrapper.rs | 554 -------------------------- src/buffer.rs | 719 ++++++++++++++++++++++++++++++++++ src/channel.rs | 60 +++ src/concurrent.rs | 231 +++++++++++ src/data.rs | 902 +++++++++++++++++------------------------- src/data/de.rs | 625 ----------------------------- src/data/ser.rs | 440 --------------------- src/dumper.rs | 908 ------------------------------------------- src/engine.rs | 309 +++++++++++++++ src/error.rs | 115 ------ src/errors.rs | 40 ++ src/items.rs | 109 ++++++ src/lib.rs | 178 +-------- src/marks.rs | 380 ++++++++++++++++++ src/object.rs | 153 -------- src/parser.rs | 615 ----------------------------- 19 files changed, 2475 insertions(+), 4552 deletions(-) delete mode 100644 src/async_wrapper.rs create mode 100644 src/buffer.rs create mode 100644 src/channel.rs create mode 100644 src/concurrent.rs delete mode 100644 src/data/de.rs delete mode 100644 src/data/ser.rs delete mode 100644 src/dumper.rs create mode 100644 src/engine.rs delete mode 100644 src/error.rs create mode 100644 src/errors.rs create mode 100644 src/items.rs create mode 100644 src/marks.rs delete mode 100644 src/object.rs delete mode 100644 src/parser.rs diff --git a/Cargo.lock b/Cargo.lock index ef78731..295e698 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,10 +3,44 @@ version = 3 [[package]] -name = "async-recursion" -version = "1.0.0" +name = "addr2line" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "anyhow" +version = "1.0.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" + +[[package]] +name = "async-channel" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f28243a43d821d11341ab73c80bed182dc015c514b951616cf79bd4af39af0c3" +dependencies = [ + "concurrent-queue", + "event-listener", + "event-listener-strategy", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-trait" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", @@ -14,10 +48,19 @@ dependencies = [ ] [[package]] -name = "autocfg" -version = "1.1.0" +name = "backtrace" +version = "0.3.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" +checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] [[package]] name = "byteorder" @@ -26,28 +69,63 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" [[package]] -name = "futures" -version = "0.3.25" +name = "cc" +version = "1.0.88" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "concurrent-queue" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38390104763dc37a5145a53c29c63c1290b5d316d6086ec32c293f6736051bb0" +checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", + "crossbeam-utils", ] [[package]] -name = "futures-channel" -version = "0.3.25" +name = "crossbeam-utils" +version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52ba265a92256105f45b719605a571ffe2d1f0fea3807304b522c1d778f79eed" +checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" + +[[package]] +name = "enum-as-inner" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" dependencies = [ - "futures-core", - "futures-sink", + "heck", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "event-listener" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7ad6fd685ce13acd6d9541a30f6db6567a7a24c9ffd4ba2955d29e3f22c8b27" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feedafcaa9b749175d5ac357452a9d41ea2911da598fde46ce1fe02c37751291" +dependencies = [ + "event-listener", + "pin-project-lite", ] [[package]] @@ -57,27 +135,45 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" [[package]] -name = "futures-executor" -version = "0.3.25" +name = "getrandom" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7acc85df6714c176ab5edf386123fafe217be88c0840ec11f199441134a074e2" +checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" dependencies = [ - "futures-core", - "futures-task", - "futures-util", + "cfg-if", + "libc", + "wasi", ] [[package]] -name = "futures-io" -version = "0.3.25" +name = "gimli" +version = "0.28.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00f5fb52a06bdcadeb54e8d3671f8888a39697dcb0b81b23b55174030427f4eb" +checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" [[package]] -name = "futures-macro" -version = "0.3.25" +name = "heck" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdfb8ce053d86b91919aad980c220b1fb8401a9394410e1c289ed7e66b61835d" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" + +[[package]] +name = "libc" +version = "0.2.153" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" + +[[package]] +name = "maybe-async" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cf92c10c7e361d6b99666ec1c6f9805b0bea2c3bd8c78dc6fe98ac5bd78db11" dependencies = [ "proc-macro2", "quote", @@ -85,123 +181,176 @@ dependencies = [ ] [[package]] -name = "futures-sink" -version = "0.3.25" +name = "mbon" +version = "0.3.0+nightly" +dependencies = [ + "anyhow", + "async-channel", + "async-trait", + "byteorder", + "enum-as-inner", + "maybe-async", + "rand", + "thiserror", + "tokio", +] + +[[package]] +name = "memchr" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39c15cf1a4aa79df40f1bb462fb39676d0ad9e366c2a33b590d7c66f4f81fcf9" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" [[package]] -name = "futures-task" -version = "0.3.25" +name = "miniz_oxide" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2ffb393ac5d9a6eaa9d3fdf37ae2776656b706e200c8e16b1bdb227f5198e6ea" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" +dependencies = [ + "adler", +] [[package]] -name = "futures-util" -version = "0.3.25" +name = "num_cpus" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "197676987abd2f9cadff84926f410af1c183608d36641465df73ae8211dc65d6" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", + "hermit-abi", + "libc", ] [[package]] -name = "mbon" -version = "0.2.0" +name = "object" +version = "0.32.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" dependencies = [ - "async-recursion", - "byteorder", - "futures", - "serde", + "memchr", ] [[package]] -name = "memchr" -version = "2.5.0" +name = "parking" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" [[package]] name = "pin-project-lite" -version = "0.2.9" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" [[package]] -name = "pin-utils" -version = "0.1.0" +name = "ppv-lite86" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" [[package]] name = "proc-macro2" -version = "1.0.47" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ea3d908b0e36316caf9e9e2c4625cdde190a7e6f440d794667ed17a1855e725" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" dependencies = [ "proc-macro2", ] [[package]] -name = "serde" -version = "1.0.145" +name = "rand" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ - "serde_derive", + "libc", + "rand_chacha", + "rand_core", ] [[package]] -name = "serde_derive" -version = "1.0.145" +name = "rand_chacha" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ - "proc-macro2", - "quote", - "syn", + "ppv-lite86", + "rand_core", ] [[package]] -name = "slab" -version = "0.4.7" +name = "rand_core" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4614a76b2a8be0058caa9dbbaf66d988527d86d003c11a94fbd335d7661edcef" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "autocfg", + "getrandom", ] +[[package]] +name = "rustc-demangle" +version = "0.1.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" + [[package]] name = "syn" -version = "1.0.103" +version = "2.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a864042229133ada95abf3b54fdc62ef5ccabe9515b64717bcb9a1919e59445d" +checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "thiserror" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.57" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio" +version = "1.36.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +dependencies = [ + "backtrace", + "num_cpus", + "pin-project-lite", +] + [[package]] name = "unicode-ident" version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" diff --git a/Cargo.toml b/Cargo.toml index 643d6f8..77a7916 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,8 @@ [package] name = "mbon" -version = "0.2.0" +version = "0.3.0+nightly" edition = "2021" -authors = ["Benjamin Jacobs "] +authors = ["Benjamin Jacobs "] description = "A serializer/deserializer for marked binary object notation" license = "MIT" @@ -14,16 +14,32 @@ keywords = ["binary", "serde"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -serde = "1.0" -byteorder = "1.4" -futures = { version = "0.3", optional = true } -async-recursion = { version = "1.0", optional = true } +byteorder = "1" +enum-as-inner = "0.6" +maybe-async = { version = "0.2" } +anyhow = "1" +thiserror = "1" + +# Async Dependencies +async-channel = { version = "2", optional = true} +tokio = { version = "1", features = ["rt-multi-thread"], optional = true } +async-trait = { version = "0.1", optional = true } [dev-dependencies] -serde = {version = "1.0", features = ["derive"] } +rand = "0.8" +# serde = "1.0" + +# [dev-dependencies] +# serde = {version = "1.0", features = ["derive"] } [features] -async = ["dep:futures", "dep:async-recursion"] +default = ["sync"] +# default = ["async-tokio"] + +sync = ["maybe-async/is_sync"] + +async = ["dep:async-channel", "dep:async-trait"] +async-tokio = ["async", "dep:tokio"] [package.metadata.docs.rs] diff --git a/readme.md b/readme.md index 166a841..9709760 100644 --- a/readme.md +++ b/readme.md @@ -1,335 +1,21 @@ [![](https://img.shields.io/crates/v/mbon.svg)][crates-io] -[![api-docs](https://docs.rs/mbon/badge.svg)][Documentation] +[![api-docs](https://docs.rs/mbon/badge.svg)][Docsrs] +[![](https://img.shields.io/badge/0.3.0%2Bnightly-hello?label=Nightly%20Docs&color=orange)][Documentation] [crates-io]: https://crates.io/crates/mbon -[Documentation]: https://docs.rs/mbon +[Documentation]: https://mbon.benjaminja.info +[Docsrs]: https://docs.rs/mbon -> This project is currently undergoing a rewrite. The code can be found in -> the [rewrite] branch. A discussion of why this is happening can be found -> in [#1]. +# Mbon 0.3.0+Nightly -[rewrite]: https://github.com/ttocsneb/mbon/tree/rewrite -[#1]: https://github.com/ttocsneb/mbon/issues/1 +Mbon is currently undergoing a major rewrite. -# Marked Binary Object Notation +You may view the specification for the file format under [spec]. -mbon is a binary notation that is inspired by the NBT format. +[spec]: spec/index.md -It is formed of a sequence of strongly typed values. Each made up of two -parts: a mark which defines the type and size of the data, followed by the -data. Marks can be different in size and so a single byte prefix is used to -differenciate between types. +During this rewrite, I will not be publishing to crates.io until it is in a +usable state. In the meantime, you can view nightly docs at [mbon.benjaminja.info] -This format is self-describing which means that it is able to know if the -data is not formatted correctly or a different type was stored than what -was expected. Another feature of the self-describing nature of the format -is that you can skip values in the data without the need to parse the complete -item, e.g. A 1GB value can be easily skipped by only reading the mark. +[mbon.benjaminja.info]: https://mbon.benjaminja.info/mbon/ -## Usage - -### Dumping - -You can dump binary data using the [dumper::Dumper][Dumper] struct. You can -write values directly or use serde's serialize to write more complex data. - -[Dumper]: https://docs.rs/mbon/latest/mbon/dumper/struct.Dumper.html - -```rust -use mbon::dumper::Dumper; - -let a = 32; -let b = "Hello World"; -let c = b'a'; - -let mut dumper = Dumper::new(); -dumper.write_int(a).unwrap(); -dumper.write(&b).unwrap(); -dumper.write(&c).unwrap(); - -let output = dumper.writer(); -assert_eq!(output, b"i\x00\x00\x00\x20s\x00\x00\x00\x0bHello Worldca"); -``` - -### Parsing - -You can parse binary data using the [parser::Parser][Parser] struct. You can -parse Value's directly, but it is recommended to use serde to parse data. - -[Parser]: https://docs.rs/mbon/latest/mbon/parser/struct.Parser.html - -```rust -use mbon::parser::Parser; -use mbon::data::Value; - -let data = b"i\x00\x00\x00\x20s\x00\x00\x00\x0bHello Worldca"; - -let mut parser = Parser::from(data); - -let a = parser.next_value().unwrap(); -let b: String = parser.next().unwrap(); -let c: u8 = parser.next().unwrap(); - -if let Value::Int(a) = a { - assert_eq!(a, 32); -} else { - panic!("a should have been an int"); -} - -assert_eq!(b, "Hello World"); -assert_eq!(c, b'a'); -``` - -### Embedded Objects - -If you are wanting to embed a predefined object inside the format, you can -impl [object::ObjectDump][ObjectDump]/[object::ObjectParse][ObjectParse]. Keep -in mind that you will need to call `write_obj()`/`parse_obj()` to take -advantage of it. - -[ObjectDump]: https://docs.rs/mbon/latest/mbon/object/trait.ObjectDump.html -[ObjectParse]: https://docs.rs/mbon/latest/mbon/object/trait.ObjectParse.html - -```rust -use mbon::parser::Parser; -use mbon::dumper::Dumper; -use mbon::error::Error; -use mbon::object::{ObjectDump, ObjectParse}; - -#[derive(Debug, PartialEq, Eq)] -struct Foo { - a: i32, - b: String, - c: char, -} - -impl ObjectDump for Foo { - type Error = Error; - - fn dump_object(&self) -> Result, Self::Error> { - let mut dumper = Dumper::new(); - - dumper.write(&self.a)?; - dumper.write(&self.b)?; - dumper.write(&self.c)?; - - Ok(dumper.writer()) - } -} - -impl ObjectParse for Foo { - type Error = Error; - - fn parse_object(object: &[u8]) -> Result { - let mut parser = Parser::new(object); - - let a = parser.next()?; - let b = parser.next()?; - let c = parser.next()?; - - Ok(Self { a, b, c }) - } -} - -let foo = Foo { a: 32, b: "Hello World".to_owned(), c: '🫠' }; -let mut dumper = Dumper::new(); - -dumper.write_obj(&foo).unwrap(); - -let buf = dumper.writer(); -let mut parser = Parser::from(&buf); - -let new_foo: Foo = parser.next_obj().unwrap(); - -assert_eq!(foo, new_foo); -``` - -### Async Implementations - -If you want to parse data asynchronously, you may want to use the provided -wrappers: [async_wrapper::AsyncDumper][AsyncDumper], -[async_wrapper::AsyncParser][AsyncParser]. - -[AsyncDumper]: https://docs.rs/mbon/latest/mbon/async_wrapper/struct.AsyncDumper.html -[AsyncParser]: https://docs.rs/mbon/latest/mbon/async_wrapper/struct.AsyncParser.html - -```rust -use futures::io::{AsyncWriteExt, Cursor}; - -use mbon::async_wrapper::{AsyncDumper, AsyncParser}; - -let writer = Cursor::new(vec![0u8; 5]); -let mut dumper = AsyncDumper::from(writer); - -dumper.write(&15u32)?; -dumper.flush().await?; - -let mut reader = dumper.writer(); -reader.set_position(0); - -let mut parser = AsyncParser::from(reader); - -let val: u32 = parser.next().await?; - -assert_eq!(val, 15); -``` - -## Grammar - -Below is a grammar for the binary format. Note that all numbers are stored -in big-endian form. - -```EBNF -Value ::= long | int | short | char | float | double | null | bytes - | str | object | enum | array | list | dict | map; -Mark ::= Mlong | Mint | Mshort | Mchar | Mfloat | Mdouble | Mnull - | Mbytes | Mstr | Mobject | Menum | Marray | Mlist | Mdict | Mmap; -Data ::= Dlong | Dint | Dshort | Dchar | Dfloat | Ddouble | Dnull - | Dbytes | Dstr | Dobject | Denum | Darray | Dlist | Ddict | Dmap; - -u32 ::= ; -i64 ::= ; -i32 ::= ; -i16 ::= ; -i8 ::= ; -f64 ::= ; -f32 ::= ; - - -Mlong ::= 'l'; -Mint ::= 'i'; -Mshort ::= 'h'; -Mchar ::= 'c'; -Mfloat ::= 'f'; -Mdouble ::= 'd'; -Mnull ::= 'n'; -Mbytes ::= 'b' u32; -Mstr ::= 's' u32; -Mobject ::= 'o' u32; -Menum ::= 'e' Mark#enum; -Marray ::= 'a' Mark#item u32; -Mlist ::= 'A' u32; -Mdict ::= 'm' Mark#key Mark#value u32; -Mmap ::= 'M' u32; - -Dlong ::= i64; -Dint ::= i32; -Dshort ::= i16; -Dchar ::= i8; -Dfloat ::= f32; -Ddouble ::= f64; -Dnull ::= ; -Dbytes ::= u8 Dbytes |; -Dstr ::= u8 Dbytes |; -Dobject ::= u8 Dbytes |; -Denum ::= u32 Data#enum; -Darray ::= Data#item Darray |; -Dlist ::= Value Dlist |; -Ddict ::= Data#key Data#value Ddict |; -Dmap ::= Value Value Dmap |; - -long ::= Mlong Dlong; -int ::= Mint Dint; -short ::= Mshort Dshort; -char ::= Mchar Dchar; -float ::= Mfloat Dflaot; -double ::= Mdouble Ddouble; -null ::= Mnull; -bytes ::= Mbytes Dbytes; -str ::= Mstr Dstr; -object ::= Mobject Dbytes; -enum ::= Menum Denum; -array ::= Marray Darray; -list ::= Mlist Dlist; -dict ::= Mdict Ddict; -map ::= Mmap Dmap; -``` - -> `X#name` means that it is related to any other `Y#name`, e.g. `Mark#item` -> in `Marray` relates to `Data#item`. - -## Specification - -| Name | Description | -|--------|-----------------------------------| -| Long | 64 bit integer | -| Int | 32 bit integer | -| Short | 16 bit integer | -| Char | 8 bit integer | -| Float | 32 bit IEEE-754 float | -| Double | 64 bit IEEE-754 float | -| Null | Only the mark | -| Bytes | Unencoded string of bytes | -| Str | UTF-8 encoded string | -| Object | Embeded preformatted data | -| Enum | u32 Variant, embed data | -| Array | `len` list of `item` data | -| List | list of values | -| Dict | `len` list of `key`-`value` pairs | -| Map | list of key-value pairs | - -### Numbers - -Every number is defined only by their mark. There is no additional data -stored in a number's mark. - -All numbers are stored in a big endian binary form. Integers are internally -considered signed, however, there is no requirement that they need to be -signed, so it is possible to read an unsigned integer as a signed integer. - -### Strings - -Strings will store their type marker followed by a u32 for their length e.g. -`b"s\x00\x00\x00\x05"` would indicate a string that is 5 bytes long. Strings -must be UTF-8 encoded; If you do not want this behaviour, you can use Bytes -which behave in the same way as Str, but without the UTF-8 requirement. - -### Object - -If you wanted to embed binary data, you can use an object value. This is -similar to the bytes value, but it uses an unsigned int for the length. It -is meant for storing binary data with a predetermined format. - -### Enum - -An enum is meant to be compatible with Rust's enum serialization. It is -defined by a variant id followed by an embedded Value. To make the enum -self-describing, the mark for the embedded value is placed within the enum's -mark. - -### Null - -Null is only uses its mark, there is no data associated with it. - -### Array - -Sequences can be stored in two forms; The Array being more strict than a -list. If a sequence cannot be stored as an array, it will be stored as a -list. - -An array is a sequence of items that all share the same mark. This means -that all elements must be the same size: A vector of u32's will always be -stored as an array, while a vector of strings can only be stored as an array -if each string is the same length. - -The Array Mark contains the mark of the contained item followed by then the -number of items in a u16. - -### List - -The list is the more lenient way to store sequences. It simply holds a -sequence of all the items. The mark of a list simply holds the number of -bytes in the list as a u32. - -### Dict - -A dict is similar to an array, but it stores key-value pairs instead. All -keys must share the same mark and all values must share the same mark. - -The Dict mark contains the key mark, followed by the value mark, and finally -the number of items in the dict. - -### Map - -The map, similar to the list stores any value types, but in a key-value -format. This is the fallback format if a value cannot be stored as a dict. diff --git a/src/async_wrapper.rs b/src/async_wrapper.rs deleted file mode 100644 index 80a4e2a..0000000 --- a/src/async_wrapper.rs +++ /dev/null @@ -1,554 +0,0 @@ -//! # Async Wrappers for Dumper and Parser -//! -//! > You need to enable the feature `async` to use these implementations. -//! -//! Async wrappers are provided for [Dumper]: [AsyncDumper] and [Parser]: -//! [AsyncParser]. -//! -//! [Dumper]: crate::dumper::Dumper -//! [Parser]: crate::parser::Parser - -use std::io::SeekFrom; -use std::mem; - -use crate::data::{Mark, Type, Value}; -use crate::dumper::Dumper; -use crate::error::Result; -use crate::object::{ObjectDump, ObjectParse}; -use crate::parser::Parser; - -use async_recursion::async_recursion; -use byteorder::{BigEndian, ReadBytesExt}; -use futures::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}; -use serde::de::DeserializeOwned; -use serde::Serialize; - -/// A wrapper for [Dumper](crate::dumper::Dumper). -/// -/// AsyncDumper writes to a buffer that can be sent to the writer with a call -/// to [send()](AsyncDumper::send). -/// -/// ## Example -/// -/// ``` -/// # futures::executor::block_on(async { -/// use futures::io::{AsyncWriteExt, Cursor}; -/// -/// use mbon::async_wrapper::AsyncDumper; -/// -/// let writer = Cursor::new(vec![0u8; 5]); -/// let mut dumper = AsyncDumper::from(writer); -/// -/// dumper.write(&15u32)?; -/// dumper.flush().await?; -/// -/// assert_eq!(dumper.writer().into_inner(), b"i\x00\x00\x00\x0f"); -/// # Ok::<(), Box>(()) }).unwrap(); -/// ``` -#[derive(Debug)] -pub struct AsyncDumper { - writer: R, - dumper: Dumper>, -} - -impl From for AsyncDumper -where - R: AsyncWriteExt + Unpin, -{ - fn from(writer: R) -> Self { - Self { - writer, - dumper: Dumper::new(), - } - } -} - -impl AsRef for AsyncDumper { - fn as_ref(&self) -> &R { - &self.writer - } -} - -impl AsMut for AsyncDumper { - fn as_mut(&mut self) -> &mut R { - &mut self.writer - } -} - -impl AsyncDumper -where - R: AsyncWriteExt + Unpin, -{ - /// Turn the dumper into the underlying writer - pub fn writer(self) -> R { - self.writer - } - - /// Get the underlying writer as a reference - pub fn get_writer(&self) -> &R { - &self.writer - } - - /// Get the underlying writer as a mutable reference - pub fn get_writer_mut(&mut self) -> &mut R { - &mut self.writer - } - - /// Try to send pending data to the async writer - /// - /// If all of the pending data has been sent, true will be returned. - /// - /// If you want to send all data, use [flush()](AsyncDumper::flush) instead. - pub async fn send(&mut self) -> Result { - let written = self.writer.write(self.dumper.get_writer()).await?; - - let buf = &self.dumper.get_writer()[written..]; - let all_done = buf.is_empty(); - - // update the buffer to contain what hasn't been sent yet - if all_done { - self.dumper.get_writer_mut().clear(); - } else { - let buf = buf.to_vec(); - drop(mem::replace(self.dumper.get_writer_mut(), buf)); - } - - Ok(all_done) - } - - /// Send all pending data to the async writer - /// - /// The returned future will not complete until all data has been written to - /// the writer. - /// - /// see [futures::AsyncWriteExt::write_all] - pub async fn flush(&mut self) -> Result<()> { - self.writer.write_all(self.dumper.get_writer()).await?; - self.dumper.get_writer_mut().clear(); - Ok(()) - } - - /// Write a serializable object to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write()](crate::dumper::Dumper::write) - #[inline] - pub fn write(&mut self, value: &T) -> Result<()> - where - T: Serialize, - { - self.dumper.write(value) - } - - /// Write a binary object to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_obj()](crate::dumper::Dumper::write_obj) - #[inline] - pub fn write_obj(&mut self, value: &T) -> Result<()> - where - T: ObjectDump, - ::Error: std::error::Error + 'static, - { - self.dumper.write_obj(value) - } - - /// Write a 64 bit integer to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_long()](crate::dumper::Dumper::write_long) - #[inline] - pub fn write_long(&mut self, val: i64) -> Result<()> { - self.dumper.write_long(val) - } - - /// Write a 32 bit integer to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_int()](crate::dumper::Dumper::write_int) - #[inline] - pub fn write_int(&mut self, val: i32) -> Result<()> { - self.dumper.write_int(val) - } - - /// Write a 16 bit integer to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_short()](crate::dumper::Dumper::write_short) - #[inline] - pub fn write_short(&mut self, val: i16) -> Result<()> { - self.dumper.write_short(val) - } - - /// Write a 8 bit integer to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_char()](crate::dumper::Dumper::write_char) - #[inline] - pub fn write_char(&mut self, val: i8) -> Result<()> { - self.dumper.write_char(val) - } - - /// Write a 32 bit IEEE754 float to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_float()](crate::dumper::Dumper::write_float) - #[inline] - pub fn write_float(&mut self, val: f32) -> Result<()> { - self.dumper.write_float(val) - } - - /// Write a 64 bit IEEE754 float to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_double()](crate::dumper::Dumper::write_double) - #[inline] - pub fn write_double(&mut self, val: f64) -> Result<()> { - self.dumper.write_double(val) - } - - /// Write a string of bytes to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_bytes()](crate::dumper::Dumper::write_bytes) - #[inline] - pub fn write_bytes(&mut self, val: impl AsRef<[u8]>) -> Result<()> { - self.dumper.write_bytes(val) - } - - /// Write a string to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_str()](crate::dumper::Dumper::write_str) - #[inline] - pub fn write_str(&mut self, val: impl AsRef) -> Result<()> { - self.dumper.write_str(val) - } - - /// Write a binary object to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_object()](crate::dumper::Dumper::write_object) - #[inline] - pub fn write_object(&mut self, val: impl AsRef<[u8]>) -> Result<()> { - self.dumper.write_object(val) - } - - /// Write an indexed value to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_enum()](crate::dumper::Dumper::write_enum) - #[inline] - pub fn write_enum(&mut self, variant: u32, val: impl AsRef) -> Result<()> { - self.dumper.write_enum(variant, val) - } - - /// Write an null value to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_null()](crate::dumper::Dumper::write_null) - #[inline] - pub fn write_null(&mut self) -> Result<()> { - self.dumper.write_null() - } - - /// Write an list of values to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_list()](crate::dumper::Dumper::write_list) - #[inline] - pub fn write_list(&mut self, val: impl AsRef>) -> Result<()> { - self.dumper.write_list(val) - } - - /// Write an key, value map of values to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_map()](crate::dumper::Dumper::write_map) - #[inline] - pub fn write_map(&mut self, val: impl AsRef>) -> Result<()> { - self.dumper.write_map(val) - } - - /// Write any value to the buffer. - /// - /// This will not send any data to the writer, use - /// [flush()](AsyncDumper::flush) to write to the writer. - /// - /// see [Dumper::write_value()](crate::dumper::Dumper::write_value) - #[inline] - pub fn write_value(&mut self, val: impl AsRef) -> Result<()> { - self.dumper.write_value(val) - } -} - -/// A wrapper for [Parser](crate::parser::Parser). -/// -/// AsyncParser reads from the reader into a buffer where Parser can parse the -/// requested data. Every request for data will ask for exactly what's needed -/// to perform the task. -/// -/// ## Example -/// -/// ``` -/// # futures::executor::block_on(async { -/// use futures::io::Cursor; -/// -/// use mbon::async_wrapper::AsyncParser; -/// -/// let reader = Cursor::new(b"i\x00\x00\x00\x0f"); -/// let mut parser = AsyncParser::from(reader); -/// -/// let val: u32 = parser.next().await?; -/// -/// assert_eq!(val, 15); -/// # Ok::<(), Box>(()) }).unwrap(); -/// ``` -#[derive(Debug)] -pub struct AsyncParser(R); - -impl From for AsyncParser -where - R: AsyncReadExt + Unpin + Send, -{ - fn from(reader: R) -> Self { - Self(reader) - } -} - -impl AsRef for AsyncParser { - fn as_ref(&self) -> &R { - &self.0 - } -} - -impl AsMut for AsyncParser { - fn as_mut(&mut self) -> &mut R { - &mut self.0 - } -} - -impl AsyncParser -where - R: AsyncReadExt + Unpin + Send, -{ - /// Turn the parser into the underlying reader - #[inline] - pub fn reader(self) -> R { - self.0 - } - - /// Get the underlying reader as a reference - #[inline] - pub fn get_reader(&self) -> &R { - &self.0 - } - - /// Get the underlying reader as a mutable reference - #[inline] - pub fn get_reader_mut(&mut self) -> &mut R { - &mut self.0 - } - - /// Parse the next item in the parser. - #[inline] - pub async fn next(&mut self) -> Result - where - T: DeserializeOwned, - { - self.next_value().await?.parse() - } - - /// Parse the next custom object in the parser. - /// - /// This allows you to be able to parse custom binary data. A common usecase - /// is to store a struct in a more compact form. You could also use object - /// values to store a different format altogether. - /// - /// Note: the next value in the parser must be an Object - /// - /// see [Parser::next_obj()](crate::parser::Parser::next_obj) - /// - #[inline] - pub async fn next_obj(&mut self) -> Result - where - T: ObjectParse, - ::Error: std::error::Error + 'static, - { - self.next_value().await?.parse_obj() - } - - async fn next_size(&mut self) -> Result { - let mut buf = [0u8; 4]; - self.0.read_exact(&mut buf).await?; - Ok(buf.as_slice().read_u32::()? as usize) - } - - #[async_recursion] - async fn next_mark(&mut self) -> Result { - // I don't particularly like this implementation as it redefines - // next_mark, but I don't see another way to know the size of the data - // without first getting the mark, and we can't get the size of the - // mark from the prefix as some marks are recursive. - let mut buf = [0u8; 1]; - self.0.read_exact(&mut buf).await?; - let prefix = Type::from_prefix(buf[0])?; - Ok(match prefix { - Type::Long => Mark::Long, - Type::Int => Mark::Int, - Type::Short => Mark::Short, - Type::Char => Mark::Char, - Type::Float => Mark::Float, - Type::Double => Mark::Double, - Type::Bytes => Mark::Bytes(self.next_size().await?), - Type::Str => Mark::Str(self.next_size().await?), - Type::Object => Mark::Object(self.next_size().await?), - Type::Enum => Mark::Enum(Box::new(self.next_mark().await?)), - Type::Null => Mark::Null, - Type::Array => { - let mark = self.next_mark().await?; - let len = self.next_size().await?; - Mark::Array(len, Box::new(mark)) - } - Type::List => Mark::List(self.next_size().await?), - Type::Dict => { - let kmark = self.next_mark().await?; - let vmark = self.next_mark().await?; - let len = self.next_size().await?; - Mark::Dict(len, Box::new(kmark), Box::new(vmark)) - } - Type::Map => Mark::Map(self.next_size().await?), - }) - } - - /// Skip the next value in the parser. - /// - /// This will ignore the next value without parsing more than what's - /// necessary. - /// - /// If the reader supports seeking, then it is preffered to use - /// [`seek_next()`](AsyncParser::seek_next) instead. - /// - /// see [Parser::skip_next()](crate::parser::Parser::skip_next) - pub async fn skip_next(&mut self) -> Result<()> { - let mark = self.next_mark().await?; - let mut buf = vec![0u8; mark.data_size()]; - self.0.read_exact(&mut buf).await?; - Ok(()) - } - - /// Parse the next value in the parser. - /// - /// see [Parser::next_value()](crate::parser::Parser::next_value) - pub async fn next_value(&mut self) -> Result { - let mark = self.next_mark().await?; - let mut buf = vec![0u8; mark.data_size()]; - self.0.read_exact(&mut buf).await?; - - let mut parser = Parser::from(&buf); - parser.next_data_value(&mark) - } -} - -impl AsyncParser -where - R: AsyncReadExt + AsyncSeekExt + Unpin + Send, -{ - /// Seek to the next value in the parser. - /// - /// This will efficiently skip the next value without reading more than - /// what's necessary - /// - /// see [Parser::seek_next()](crate::parser::Parser::seek_next) - pub async fn seek_next(&mut self) -> Result<()> { - let mark = self.next_mark().await?; - self.0 - .seek(SeekFrom::Current(mark.data_size() as i64)) - .await?; - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::*; - use futures::io::Cursor; - - #[test] - fn test_parser() { - futures::executor::block_on(async { - let reader = Cursor::new(b"ac\x00\x00\x00\x04\x01\x02\x03\x04"); - let mut parser = AsyncParser::from(reader); - - let val: Vec = parser.next().await?; - - assert_eq!(val, vec![1, 2, 3, 4]); - Ok::<(), Box>(()) - }) - .unwrap(); - } - - #[test] - fn test_seek() { - futures::executor::block_on(async { - let reader = Cursor::new( - b"s\x00\x00\x00\x23This is a string I don't care abouti\x00\x00\x00\x20", - ); - let mut parser = AsyncParser::from(reader); - - parser.seek_next().await?; - let val: u32 = parser.next().await?; - - assert_eq!(val, 32); - Ok::<(), Box>(()) - }) - .unwrap(); - } - - #[test] - fn test_skip() { - futures::executor::block_on(async { - let reader = Cursor::new( - b"s\x00\x00\x00\x23This is a string I don't care abouti\x00\x00\x00\x20", - ); - let mut parser = AsyncParser::from(reader); - - parser.skip_next().await?; - let val: u32 = parser.next().await?; - - assert_eq!(val, 32); - Ok::<(), Box>(()) - }) - .unwrap(); - } -} diff --git a/src/buffer.rs b/src/buffer.rs new file mode 100644 index 0000000..73feae2 --- /dev/null +++ b/src/buffer.rs @@ -0,0 +1,719 @@ +//! Contains [BufferedReadWrite], which is a wrapper for files. +//! +//! It currently can only be implemented synchronously which requires that +//! operations are executed in a spawn_blocking context. This isn't the worst, +//! but it would be nice to be able to utilize async io as it is natively +//! supported by tokio. +//! +//! I can't just write an AsyncRead/AsyncWrite wrapper since it requires a state +//! that would make the current implementation way too complex. If I were to +//! implement AsyncReadExt/AsyncWriteExt which have a nicer, I would also need +//! to implement the base traits. It's possible that I could just implement the +//! base trait and panic if the base trait is called, but I don't know how I +//! feel about that. +//! +//! I could make a custom trait that all asyncReadExt objects would implement, +//! but then users would need to import that custom trait whenever they are +//! using the engine which doesn't sound great either. +//! +//! Also, how would I deal with files that are provided that are sync only, such +//! as with a `vec`? When in async mode, I would have to have two +//! implementations available for whether F is async or not. + +use std::{ + collections::{BinaryHeap, HashMap, HashSet}, + mem, +}; + +use std::io::{self, Read, Seek, SeekFrom, Write}; + +struct Block { + data: Vec, + access: u64, +} + +/// A wrapper for files which holds a buffer for the file. +/// +/// This buffer can hold the entire file in memory and has an option to limit +/// how much data is stored in memory (the default limit is 1GiB). +/// +/// Reads and writes are performed in blocks (the default block size is 512 +/// bytes). +/// +/// This struct is designed to work with simultaneous read/write operations. +/// +/// No writes occur to the underlying file until either flush is called, or the +/// cache limit has been met. +pub struct BufferedReadWrite { + blocks: HashMap, + modified: HashSet, + file: F, + block_size: u64, + max_blocks: usize, + ideal_blocks: usize, + cursor: u64, + access_count: u64, +} + +// This macro is needed because wrapping it in a function causes issues with the +// borrow checker. (only access_count is modified) +macro_rules! get_block { + ($self:ident, $block:expr) => {{ + if let Some(block) = $self.blocks.get_mut(&$block) { + block.access = $self.access_count; + $self.access_count += 1; + Some(&block.data) + } else { + None + } + }}; + (mut $self:ident, $block:expr) => {{ + if let Some(block) = $self.blocks.get_mut(&$block) { + block.access = $self.access_count; + $self.access_count += 1; + Some(&mut block.data) + } else { + None + } + }}; +} + +/// Builder for [BufferedReadWrite]. +pub struct BufferedReadWriteBuilder { + file: F, + block_size: Option, + max_blocks: Option, + ideal_blocks: Option, + max_cache: Option, + ideal_cache: Option, +} + +impl BufferedReadWriteBuilder { + /// Set the number of bytes in a block. + /// + /// The default is 512 bytes. + pub fn with_block_size(mut self, block_size: u64) -> Self { + self.block_size = Some(block_size); + self + } + + /// The maximum number of blocks to have in the cache. + /// + /// This sets the same value as [Self::with_max_cache], but in a different + /// unit + /// + /// The default value is 1GiB. + /// + /// Note that during a single read, the cache may become larger than the + /// maximum cache for the duration of the read. + pub fn with_max_blocks(mut self, max_blocks: usize) -> Self { + self.max_blocks = Some(max_blocks); + self.max_cache = None; + self + } + + /// The maximum number of bytes to have in the cache. + /// + /// This sets the same value as [Self::with_max_blocks], but in a different unit + /// + /// The default value is 1GiB. + /// + /// Note that during a single read, the cache may become larger than the + /// maximum cache for the duration of the read. + pub fn with_max_cache(mut self, max_cache: u64) -> Self { + self.max_cache = Some(max_cache); + self.max_blocks = None; + self + } + + /// The number of blocks to reduce the cache to when the cache has filled up. + /// + /// This sets the same value as [Self::with_ideal_cache], but in a different + /// unit + /// + /// The default value is `max_cache - (1MiB, 1KiB, or max_cache / 5 + /// /* Which ever is the largest value smaller than max_cache*/)`. + pub fn with_ideal_blocks(mut self, max_blocks: usize) -> Self { + self.ideal_blocks = Some(max_blocks); + self.ideal_cache = None; + self + } + + /// The number of bytes to reduce the cache to when the cache has filled up. + /// + /// This sets the same value as [Self::with_ideal_blocks], but in a + /// different unit + /// + /// The default value is `max_cache - (1MiB, 1KiB, or max_cache / 5 + /// /* Which ever is the largest value smaller than max_cache*/)`. + pub fn with_ideal_cache(mut self, max_cache: u64) -> Self { + self.ideal_cache = Some(max_cache); + self.ideal_blocks = None; + self + } + + /// Create the BufferedReadWrite object + pub fn build(self) -> BufferedReadWrite { + let block_size = self.block_size.unwrap_or(512); + let max_blocks = self + .max_blocks + .unwrap_or_else(|| (self.max_cache.unwrap_or(1_073_741_824) / block_size) as usize); + let ideal_blocks = self + .ideal_blocks + .or_else(|| self.ideal_cache.map(|cache| (cache / block_size) as usize)) + .unwrap_or_else(|| { + let mut blocks = (1_048_576 / block_size) as usize; + if blocks > max_blocks { + blocks = (1024 / block_size) as usize; + } + if blocks > max_blocks { + blocks = max_blocks / 5; + } + + max_blocks - blocks + }); + // .unwrap_or_else(|| (self.rec_cache.unwrap_or(1_000_000_000) / block_size) as usize); + BufferedReadWrite { + blocks: HashMap::new(), + modified: HashSet::new(), + file: self.file, + cursor: 0, + block_size, + max_blocks, + ideal_blocks, + access_count: 0, + } + } +} + +impl From for BufferedReadWrite +where + F: Seek, +{ + fn from(value: F) -> Self { + Self::new(value).build() + } +} + +impl BufferedReadWrite +where + F: Seek, +{ + /// Create a new BufferedReadWriteBuilder. + /// + /// ```no_run + /// use mbon::buffer::BufferedReadWrite; + /// use std::fs::File; + /// + /// let file = File::options() + /// .read(true) + /// .write(true) + /// .create(true) + /// .open("my_file.mbon").unwrap(); + /// let f = BufferedReadWrite::new(file).build(); + /// ``` + #[inline] + pub fn new(file: F) -> BufferedReadWriteBuilder { + BufferedReadWriteBuilder { + file, + block_size: None, + max_blocks: None, + max_cache: None, + ideal_cache: None, + ideal_blocks: None, + } + } + + /// Purge the n_blocks least recently used blocks from the cache. + /// + /// This will ignore any blocks that have been modified. + fn purge_least_recently_used(&mut self, n_blocks: usize) { + println!( + "Clearing {n_blocks} blocks to {}", + self.blocks.len() - n_blocks + ); + let mut to_delete = BinaryHeap::new(); + + for (k, v) in &self.blocks { + if self.modified.contains(&k) { + // Don't try to clean modified blocks + continue; + } + if to_delete.len() < n_blocks { + to_delete.push((v.access, *k)); + } else if let Some((access, _)) = to_delete.peek() { + if v.access <= *access { + to_delete.push((v.access, *k)); + } + if to_delete.len() > n_blocks { + to_delete.pop(); + } + } + } + + for (_, k) in to_delete { + self.blocks.remove(&k); + } + } + + /// Clear the cache without flushing the file. + /// + /// This will preserve any cached blocks that have been modified. + pub fn clear_cache_no_flush(&mut self) { + let blocks = mem::take(&mut self.blocks); + self.blocks = blocks + .into_iter() + .filter(|(k, _)| self.modified.contains(k)) + .collect(); + } +} + +impl BufferedReadWrite +where + F: Write + Seek, +{ + fn flush_blocks(&mut self) -> io::Result<()> { + // I'm sorting here because I would assume that it is quicker for the file system to write + // in order than it would be to write in a random order. + let mut modified: Vec<_> = mem::take(&mut self.modified).into_iter().collect(); + modified.sort_unstable(); + + let mut position = match modified.first() { + Some(sect) => self.file.seek(SeekFrom::Start(sect * self.block_size))?, + None => self.file.seek(SeekFrom::Current(0))?, + }; + + for sect in modified { + let buf = match get_block!(mut self, sect) { + Some(b) => b, + None => continue, + }; + let pos = sect * self.block_size; + if position != pos { + self.file.seek(SeekFrom::Start(pos))?; + position = pos; + } + + self.file.write_all(buf.as_slice())?; + position += buf.len() as u64; + } + self.file.flush()?; + + if self.blocks.len() > self.max_blocks { + self.purge_least_recently_used(self.blocks.len() - self.ideal_blocks); + } + + Ok(()) + } + + /// Clear the cache + /// + /// If there are any modified changes, they will be written to disk before + /// clearing the cache. + pub fn clear_cache(&mut self) -> io::Result<()> { + self.flush_blocks()?; + self.blocks.clear(); + Ok(()) + } +} + +impl BufferedReadWrite +where + F: Read + Seek, +{ + fn load_blocks(&mut self, position: u64, len: u64) -> io::Result<()> { + let end = position + len; + let block = position / self.block_size; + let end_block = (end + self.block_size - 1) / self.block_size; + let num_blocks = end_block - block; + + let mut to_load = Vec::new(); + + for sect in block..block + num_blocks { + if !self.blocks.contains_key(§) { + to_load.push(sect); + } + } + + let mut position = self.file.seek(SeekFrom::Current(0))?; + + for sect in to_load { + let pos = sect * self.block_size; + if position != pos { + self.file.seek(SeekFrom::Start(pos))?; + position = pos; + } + let mut buf = vec![0u8; self.block_size as usize]; + let mut offset = 0; + let mut eof = false; + + while offset < buf.len() { + let read = match self.file.read(&mut buf[offset..]) { + Ok(n) => n, + Err(err) => match err.kind() { + io::ErrorKind::Interrupted => { + continue; + } + _ => return Err(err), + }, + }; + if read == 0 { + eof = true; + break; + } + position += read as u64; + offset += read; + } + for i in (offset..buf.len()).rev() { + buf.remove(i); + } + + self.blocks.insert( + sect, + Block { + data: buf, + access: self.access_count, + }, + ); + self.access_count += 1; + if eof { + break; + } + } + + Ok(()) + } +} + +impl Read for BufferedReadWrite +where + F: Read + Seek, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.load_blocks(self.cursor, buf.len() as u64)?; + let mut offset = 0; + let mut sect = self.cursor / self.block_size; + let mut sect_index = self.cursor % self.block_size; + let mut block = get_block!(self, sect); + + while let Some(buffer) = block { + let buffer = &buffer[sect_index as usize..]; + let b = &mut buf[offset..]; + let to_read = buffer.len().min(b.len()); + + let b = &mut b[..to_read]; + b.copy_from_slice(&buffer[..to_read]); + offset += to_read; + self.cursor += to_read as u64; + if offset == buf.len() { + break; + } + + sect += 1; + sect_index = 0; + block = get_block!(self, sect); + } + + if self.blocks.len() > self.max_blocks { + self.purge_least_recently_used(self.blocks.len() - self.ideal_blocks); + } + + Ok(offset) + } +} + +impl Write for BufferedReadWrite +where + F: Read + Write + Seek, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + self.load_blocks(self.cursor, buf.len() as u64)?; + let mut offset = 0; + let mut sect = self.cursor / self.block_size; + let mut sect_index = self.cursor % self.block_size; + let block_size = self.block_size; + let mut block = get_block!(mut self, sect); + + while let Some(buffer) = block { + let write = &mut buffer[sect_index as usize..]; + let read = &buf[offset..]; + let to_write = write.len().min(read.len()); + + let write = &mut write[..to_write]; + write.copy_from_slice(&read[..to_write]); + self.modified.insert(sect); + + self.cursor += to_write as u64; + offset += to_write; + + if offset == buf.len() { + break; + } + + if (buffer.len() as u64) < block_size { + // If the block isn't a full block, write to the end of the block + + let mut write = vec![0u8; block_size as usize - buffer.len()]; + let read = &buf[offset..]; + let to_write = write.len().min(read.len()); + + let write = &mut write[..to_write]; + write.copy_from_slice(&read[..to_write]); + buffer.extend_from_slice(write); + self.cursor += to_write as u64; + + offset += to_write; + + if offset == buf.len() { + break; + } + } + + sect += 1; + sect_index = 0; + block = get_block!(mut self, sect); + } + + while offset < buf.len() { + // There are new blocks to write + + let read = &buf[offset..]; + let max_write = self.max_blocks - sect_index as usize; + let to_write = max_write.min(read.len()); + + let mut buffer = vec![0u8; sect_index as usize + to_write]; + + let write = &mut buffer[sect_index as usize..]; + write.copy_from_slice(read); + + self.cursor += to_write as u64; + offset += to_write; + + self.blocks.insert( + sect, + Block { + data: buffer, + access: self.access_count, + }, + ); + self.access_count += 1; + self.modified.insert(sect); + + sect += 1; + sect_index = 0; + } + + if self.blocks.len() > self.max_blocks { + self.flush_blocks()?; + } + + Ok(offset) + } + + fn flush(&mut self) -> io::Result<()> { + self.flush_blocks() + } +} + +impl Seek for BufferedReadWrite +where + F: Seek, +{ + fn seek(&mut self, pos: SeekFrom) -> io::Result { + match pos { + SeekFrom::Start(p) => self.cursor = p, + SeekFrom::End(_) => { + self.cursor = self.file.seek(pos)?; + } + SeekFrom::Current(o) => { + self.cursor = self.cursor.checked_add_signed(o).ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot seek to a negative position", + ) + })? + } + } + Ok(self.cursor) + } +} + +#[cfg(test)] +mod test { + use rand::{rngs::StdRng, Rng as _, SeedableRng}; + use std::{ + io::{Cursor, Read, Seek, Write}, + slice, + }; + + use super::*; + const FILE: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit. + +Iaculis eu non diam phasellus vestibulum lorem sed risus ultricies. Vitae ultricies leo integer malesuada nunc. Enim lobortis scelerisque fermentum dui faucibus in ornare. Et netus et malesuada fames. Dignissim enim sit amet venenatis urna cursus. Volutpat maecenas volutpat blandit aliquam etiam erat velit scelerisque in. Viverra nibh cras pulvinar mattis nunc sed blandit libero. Condimentum id venenatis a condimentum. Blandit cursus risus at ultrices. Auctor eu augue ut lectus arcu. Felis imperdiet proin fermentum leo vel. Imperdiet dui accumsan sit amet nulla facilisi morbi tempus. Sed velit dignissim sodales ut eu sem integer vitae. Auctor urna nunc id cursus metus. Mattis pellentesque id nibh tortor id aliquet. Vitae auctor eu augue ut lectus arcu bibendum. Nisl condimentum id venenatis a condimentum vitae. Fusce id velit ut tortor pretium. Dignissim enim sit amet venenatis urna cursus eget. Sit amet mauris commodo quis. + +Aliquam nulla facilisi cras fermentum odio eu feugiat pretium nibh. Tellus id interdum velit laoreet id donec ultrices tincidunt. Facilisis leo vel fringilla est ullamcorper eget. Orci phasellus egestas tellus rutrum tellus pellentesque. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. Cursus risus at ultrices mi tempus. Vitae auctor eu augue ut lectus arcu bibendum. Adipiscing elit duis tristique sollicitudin nibh sit amet commodo. Cursus mattis molestie a iaculis at erat pellentesque adipiscing. Suspendisse in est ante in nibh mauris. Scelerisque in dictum non consectetur a erat nam at lectus. Amet tellus cras adipiscing enim eu. + +Sem nulla pharetra diam sit amet nisl suscipit adipiscing bibendum. Quam pellentesque nec nam aliquam sem et tortor consequat id. In nibh mauris cursus mattis molestie. Fermentum et sollicitudin ac orci phasellus egestas tellus. Volutpat maecenas volutpat blandit aliquam etiam erat velit scelerisque. Sollicitudin aliquam ultrices sagittis orci a scelerisque purus. Molestie nunc non blandit massa enim nec dui nunc. Ac ut consequat semper viverra nam libero. Quam elementum pulvinar etiam non quam. In hac habitasse platea dictumst vestibulum rhoncus est. Volutpat est velit egestas dui id ornare. Sed sed risus pretium quam vulputate dignissim suspendisse. Lorem sed risus ultricies tristique. Nibh sit amet commodo nulla facilisi nullam vehicula. Vel pretium lectus quam id leo in vitae turpis massa. + +Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper velit sed ullamcorper morbi. Interdum velit euismod in pellentesque massa placerat. Phasellus faucibus scelerisque eleifend donec pretium vulputate. Amet nisl suscipit adipiscing bibendum. Quam viverra orci sagittis eu volutpat odio facilisis mauris. Gravida dictum fusce ut placerat. Eget duis at tellus at urna condimentum mattis pellentesque. Est pellentesque elit ullamcorper dignissim cras. Iaculis nunc sed augue lacus viverra vitae congue eu consequat."; + + const SHORT: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit."; + + #[test] + fn test_read() { + let mut cursor = Cursor::new(FILE); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .build(); + + let mut buf = [0u8; 100]; + for i in 0..(FILE.len() / 100) { + let count = f.read(&mut buf).unwrap(); + assert_eq!(count, 100); + assert_eq!(buf.as_slice(), &FILE[i * 100..(i + 1) * 100]); + } + + let count = f.read(&mut buf).unwrap(); + assert_eq!(count, 12); + assert_eq!(&buf[..count], &FILE[4100..4112]); + } + + #[test] + fn test_write() { + let mut cursor = Cursor::new(Vec::::new()); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .build(); + + let count = f.write(SHORT).unwrap(); + assert_eq!(count, SHORT.len()); + f.flush().unwrap(); + + cursor.rewind().unwrap(); + + let mut buf = vec![0u8; SHORT.len()]; + let read = cursor.read(&mut buf).unwrap(); + assert_eq!(read, SHORT.len()); + assert_eq!(buf.as_slice(), SHORT); + } + + #[test] + fn test_replace() { + let mut cursor = Cursor::new(Vec::from(FILE)); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .build(); + + let written = f.write(b"Hello World").unwrap(); + assert_eq!(written, 11); + f.flush().unwrap(); + + cursor.rewind().unwrap(); + + let mut buf = vec![0u8; 20]; + let read = cursor.read(&mut buf).unwrap(); + assert_eq!(read, 20); + assert_eq!(buf.as_slice(), b"Hello World dolor si"); + } + + #[test] + fn test_append() { + let mut cursor = Cursor::new(Vec::from(SHORT)); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .build(); + + let written = f.write(FILE).unwrap(); + assert_eq!(written, FILE.len()); + f.flush().unwrap(); + + cursor.rewind().unwrap(); + + let mut buf = vec![0u8; FILE.len()]; + let read = cursor.read(&mut buf).unwrap(); + assert_eq!(read, FILE.len()); + assert_eq!(buf.as_slice(), FILE); + } + + #[test] + fn test_replace_arbitrary() { + let mut cursor = Cursor::new(Vec::from(FILE)); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .build(); + + f.seek(SeekFrom::Start(9)).unwrap(); + let written = f.write(b"Hello World").unwrap(); + assert_eq!(written, 11); + f.flush().unwrap(); + + cursor.rewind().unwrap(); + + let mut buf = vec![0u8; 30]; + let read = cursor.read(&mut buf).unwrap(); + assert_eq!(read, 30); + assert_eq!(buf.as_slice(), b"Lorem ipsHello Worldt amet, co"); + } + + #[test] + fn test_read_cache_limit() { + let mut cursor = Cursor::new(FILE); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .with_max_blocks(13) + .build(); + + let mut buf = [0u8; 100]; + for i in 0..(FILE.len() / 100) { + let count = f.read(&mut buf).unwrap(); + assert_eq!(count, 100); + assert_eq!(buf.as_slice(), &FILE[i * 100..(i + 1) * 100]); + assert!(f.blocks.len() <= 13); + } + + let count = f.read(&mut buf).unwrap(); + assert_eq!(count, 12); + assert_eq!(&buf[..count], &FILE[4100..4112]); + } + + #[test] + fn test_read_after_end() { + let mut cursor = Cursor::new(FILE); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .build(); + + let mut buf = [0u8; 100]; + f.seek(SeekFrom::End(100)).unwrap(); + let read = f.read(&mut buf).unwrap(); + assert_eq!(read, 0); + } + + #[test] + fn test_random_writes() { + let mut file = Vec::from(FILE); + let mut cursor = Cursor::new(&mut file); + let mut f = BufferedReadWrite::new(&mut cursor) + .with_block_size(13) + .with_max_blocks(13) + .build(); + + let mut rng = StdRng::from_seed(*b"Hiya World This is a random seed"); + // let mut rng = StdRng::from_entropy(); + + for _ in 0..1000 { + let i = rng.gen_range(0..FILE.len()); + let c = rng.gen_range(0u8..255); + + f.seek(SeekFrom::Start(i as u64)).unwrap(); + f.write(slice::from_ref(&c)).unwrap(); + } + f.flush().unwrap(); + + let mut buf = vec![0u8; FILE.len()]; + f.rewind().unwrap(); + f.read_exact(buf.as_mut_slice()).unwrap(); + + assert_eq!(file, buf); + } +} diff --git a/src/channel.rs b/src/channel.rs new file mode 100644 index 0000000..dcd42b6 --- /dev/null +++ b/src/channel.rs @@ -0,0 +1,60 @@ +use maybe_async::maybe_async; + +#[cfg(feature = "async")] +use async_channel::{unbounded as new_channel, Receiver, RecvError, SendError, Sender}; +#[cfg(feature = "sync")] +use std::sync::mpsc::{channel as new_channel, Receiver, RecvError, SendError, Sender}; + +pub struct ChanSend(pub Sender); +pub struct ChanRecv(pub Receiver); + +impl Clone for ChanSend { + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + +#[maybe_async] +pub fn channel() -> (ChanSend, ChanRecv) { + let (s, r) = new_channel(); + (ChanSend(s), ChanRecv(r)) +} + +#[maybe_async] +impl ChanSend { + #[inline] + pub async fn send(&self, data: T) -> Result<(), SendError> { + self.0.send(data).await + } + + #[cfg(feature = "async")] + #[inline] + pub fn send_blocking(&self, data: T) -> Result<(), SendError> { + self.0.send_blocking(data) + } + + #[cfg(feature = "sync")] + #[inline] + pub fn send_blocking(&self, data: T) -> Result<(), SendError> { + self.0.send(data) + } +} +#[maybe_async] +impl ChanRecv { + #[inline] + pub async fn recv(&self) -> Result { + self.0.recv().await + } + + #[cfg(feature = "async")] + #[inline] + pub fn recv_blocking(&self) -> Result { + self.0.recv_blocking() + } + + #[cfg(feature = "sync")] + #[inline] + pub fn recv_blocking(&self) -> Result { + self.0.recv() + } +} diff --git a/src/concurrent.rs b/src/concurrent.rs new file mode 100644 index 0000000..29cd67f --- /dev/null +++ b/src/concurrent.rs @@ -0,0 +1,231 @@ +use enum_as_inner::EnumAsInner; +use maybe_async::maybe_async; + +use std::io::{self, Read, Seek, SeekFrom}; + +use crate::{ + channel::{channel, ChanRecv, ChanSend}, + data::{Data, PartialItem}, + engine::{Engine, MbonParserRead}, + errors::{MbonError, MbonResult}, + marks::Mark, +}; + +#[cfg(feature = "sync")] +use std::thread::{spawn, JoinHandle}; +#[cfg(feature = "async-tokio")] +use tokio::task::{spawn, JoinHandle}; + +#[derive(EnumAsInner)] +enum RequestE { + ParseMark { + location: SeekFrom, + }, + ParseItem { + location: SeekFrom, + }, + ParseData { + mark: Mark, + location: SeekFrom, + }, + ParseItemN { + location: SeekFrom, + count: Option, + bytes: u64, + parse_data: bool, + }, + ParseDataN { + mark: Mark, + location: SeekFrom, + n: usize, + }, + Close, +} +pub struct Request { + response: ChanSend, + request: RequestE, +} + +#[derive(EnumAsInner)] +pub enum Response { + ParseMark(MbonResult<(Mark, u64)>), + ParseItem(MbonResult), + ParseData(MbonResult<(Data, u64)>), + ParseDataN(MbonResult>), + ParseItemN(MbonResult>), + Stopped, +} + +pub struct ConcurrentEngineWrapper { + engine: Engine, + recv: ChanRecv, +} + +pub struct ConcurrentEngineClient { + req: ChanSend, +} + +impl Clone for ConcurrentEngineClient { + fn clone(&self) -> Self { + Self::new(self.req.clone()) + } +} + +#[maybe_async] +impl ConcurrentEngineClient { + pub fn new(req: ChanSend) -> Self { + Self { req } + } + + async fn send_request(&self, request: RequestE) -> io::Result { + let (send, resp) = channel(); + self.req + .send(Request { + response: send, + request, + }) + .await + .map_err(|_| io::Error::new(io::ErrorKind::ConnectionReset, "Receiver was closed"))?; + resp.recv() + .await + .map_err(|_| io::Error::new(io::ErrorKind::ConnectionReset, "Transmitter was closed")) + } + + fn expect(value: Result, Response>) -> MbonResult { + match value { + Ok(res) => res, + Err(_) => Err(MbonError::InternalError("Received Invalid Response".into())), + } + } + + pub async fn close_engine(&self) -> MbonResult<()> { + let resp = self.send_request(RequestE::Close).await?; + if !resp.is_stopped() { + return Err(MbonError::InternalError("Received Invalid Response".into())); + } + Ok(()) + } +} + +#[maybe_async] +impl MbonParserRead for ConcurrentEngineClient { + async fn parse_mark(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)> { + let response = self.send_request(RequestE::ParseMark { location }).await?; + Self::expect(response.into_parse_mark()) + } + + async fn parse_data(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)> { + let response = self + .send_request(RequestE::ParseData { + mark: mark.to_owned(), + location, + }) + .await?; + Self::expect(response.into_parse_data()) + } + + async fn parse_item(&mut self, location: SeekFrom) -> MbonResult { + let response = self.send_request(RequestE::ParseItem { location }).await?; + Self::expect(response.into_parse_item()) + } + + async fn parse_item_n( + &mut self, + location: SeekFrom, + count: Option, + bytes: u64, + parse_data: bool, + ) -> MbonResult> { + let response = self + .send_request(RequestE::ParseItemN { + location, + count, + bytes, + parse_data, + }) + .await?; + Self::expect(response.into_parse_item_n()) + } + + async fn parse_data_n( + &mut self, + mark: &Mark, + location: SeekFrom, + n: usize, + ) -> MbonResult> { + let response = self + .send_request(RequestE::ParseDataN { + mark: mark.to_owned(), + location, + n, + }) + .await?; + Self::expect(response.into_parse_data_n()) + } +} + +/// Wraps an [crate::engine::Engine] allowing for multiple concurrent requests +/// to the engine +#[maybe_async] +impl ConcurrentEngineWrapper +where + F: Read + Seek + Send + 'static, +{ + pub fn new(engine: Engine) -> (Self, ConcurrentEngineClient) { + let (send, recv) = channel(); + (Self { engine, recv }, ConcurrentEngineClient::new(send)) + } + + pub fn spawn(self) -> JoinHandle> { + #[cfg(feature = "async")] + let future = self.program_loop(); + #[cfg(feature = "sync")] + let future = || self.program_loop(); + return spawn(future); + } + + async fn program_loop(mut self) -> io::Result<()> { + loop { + let action = self.recv.recv().await.map_err(|_| { + io::Error::new(io::ErrorKind::ConnectionReset, "Transmitter was closed") + })?; + if self.on_action(action).await { + return Ok(()); + } + } + } + + async fn on_action(&mut self, action: Request) -> bool { + let response = match action.request { + RequestE::ParseMark { location } => { + Response::ParseMark(self.engine.parse_mark(location).await) + } + RequestE::ParseItem { location } => { + Response::ParseItem(self.engine.parse_item(location).await) + } + RequestE::ParseData { mark, location } => { + Response::ParseData(self.engine.parse_data(&mark, location).await) + } + RequestE::ParseDataN { mark, location, n } => { + Response::ParseDataN(self.engine.parse_data_n(&mark, location, n).await) + } + RequestE::ParseItemN { + location, + count, + bytes, + parse_data, + } => Response::ParseItemN( + self.engine + .parse_item_n(location, count, bytes, parse_data) + .await, + ), + RequestE::Close => { + action.response.send(Response::Stopped).await.ok(); + return true; + } + }; + + action.response.send(response).await.ok(); + false + } +} diff --git a/src/data.rs b/src/data.rs index 4c95355..99e9bc3 100644 --- a/src/data.rs +++ b/src/data.rs @@ -1,623 +1,413 @@ -//! Internal data structs -//! -//! Here, you'll find [Value], [Mark], and [Type]. - -use std::fmt::Display; +use enum_as_inner::EnumAsInner; +use maybe_async::maybe_async; +use std::{ + char::{self}, + io::{Read, Seek, SeekFrom}, + mem, + ops::Deref, + sync::Arc, +}; -use serde::{Deserialize, Serialize}; +use byteorder::{LittleEndian, ReadBytesExt}; use crate::{ - error::{Error, Result}, - object::{ObjectDump, ObjectParse}, + engine::MbonParserRead, + errors::{MbonError, MbonResult}, + marks::{Mark, Size}, }; -use self::{de::ValueDe, ser::ValueSer}; - -pub mod de; -pub mod ser; - -/// The basic unit for data in binary save data. -/// -/// This is used as an intermidiate object for dumping/loading binary data. You -/// will generally not need to use this struct. -#[derive(Debug, Clone, PartialEq)] -pub enum Value { - Long(i64), - Int(i32), - Short(i16), - Char(i8), - Float(f32), - Double(f64), - Bytes(Vec), - Str(String), - Object(Vec), - Enum(u32, Box), - Null, - List(Vec), - Map(Vec<(Value, Value)>), -} +macro_rules! number_type { + ($name:ident, $type:ty, $file:ident: $read:expr) => { + #[derive(Debug, Clone)] + pub struct $name($type); + impl Deref for $name { + type Target = $type; -impl Value { - /// Get the type of this value. - /// - /// This will return the actual type that would be stored when the value is - /// converted into binary. - /// - /// ``` - /// use mbon::data::{Value, Type}; - /// - /// assert_eq!(Value::Long(32).get_type(), Type::Long); - /// - /// assert_eq!( - /// Value::List(vec![Value::Int(64), Value::Int(12)]).get_type(), - /// Type::Array - /// ); - /// - /// assert_eq!( - /// Value::List(vec![Value::Int(64), Value::Short(12)]).get_type(), - /// Type::List - /// ); - /// ``` - pub fn get_type(&self) -> Type { - match self { - Value::Long(_) => Type::Long, - Value::Int(_) => Type::Int, - Value::Short(_) => Type::Short, - Value::Char(_) => Type::Char, - Value::Float(_) => Type::Float, - Value::Double(_) => Type::Double, - Value::Bytes(_) => Type::Bytes, - Value::Str(_) => Type::Str, - Value::Object(_) => Type::Object, - Value::Enum(_, _) => Type::Enum, - Value::Null => Type::Null, - Value::List(v) => { - if Self::can_be_array(v) { - Type::Array - } else { - Type::List - } + fn deref(&self) -> &Self::Target { + &self.0 } - Value::Map(v) => { - if Self::can_be_dict(v) { - Type::Dict - } else { - Type::Map - } + } + impl $name { + pub(crate) fn parse($file: &mut R) -> MbonResult { + let val = $read; + Ok(Self(val)) + } + + pub fn value(&self) -> $type { + self.0 } } - } + }; +} - /// Parse a struct from this value - /// - /// ``` - /// use mbon::data::Value; - /// - /// let foo: u32 = Value::Int(345).parse().unwrap(); - /// assert_eq!(foo, 345); - /// ``` - #[inline] - pub fn parse<'t, T>(&'t self) -> Result - where - T: Deserialize<'t>, - { - T::deserialize(ValueDe::new(&self)) - } +macro_rules! char_impl { + ($name:ident) => { + impl $name { + #[inline] + pub fn as_char(&self) -> Option { + char::from_u32(self.0 as u32) + } + } + }; +} - /// Dump a struct into a value - /// - /// ``` - /// use mbon::data::Value; - /// - /// let obj: u32 = 345; - /// let val = Value::dump(obj).unwrap(); - /// - /// if let Value::Int(v) = val { - /// assert_eq!(v, 345); - /// } else { - /// panic!("val is not an Int"); - /// } - /// ``` - #[inline] - pub fn dump(value: T) -> Result - where - T: Serialize, - { - value.serialize(&mut ValueSer) +number_type!(U8, u8, f: f.read_u8()?); +number_type!(U16, u16, f: f.read_u16::()?); +number_type!(U32, u32, f: f.read_u32::()?); +number_type!(U64, u64, f: f.read_u64::()?); +number_type!(I8, i8, f: f.read_i8()?); +number_type!(I16, i16, f: f.read_i16::()?); +number_type!(I32, i32, f: f.read_i32::()?); +number_type!(I64, i64, f: f.read_i64::()?); +number_type!(F32, f32, f: f.read_f32::()?); +number_type!(F64, f64, f: f.read_f64::()?); +number_type!(C8, u8, f: f.read_u8()?); +char_impl!(C8); +number_type!(C16, u16, f: f.read_u16::()?); +char_impl!(C16); +number_type!(C32, u32, f: f.read_u32::()?); +char_impl!(C32); + +#[derive(Debug, Clone)] +pub struct Str(String); +impl Deref for Str { + type Target = String; + + fn deref(&self) -> &Self::Target { + &self.0 } - - /// Parse an object from this value - /// - /// This will attempt to parse an Object only if the Value is an Object - /// type. - /// - /// ``` - /// use mbon::object::ObjectParse; - /// use mbon::parser::Parser; - /// use mbon::error::Error; - /// use mbon::data::Value; - /// - /// struct Foo { - /// a: i32, - /// b: String, - /// } - /// - /// impl ObjectParse for Foo { - /// type Error = Error; - /// - /// fn parse_object(object: &[u8]) -> Result { - /// let mut parser = Parser::from(object); - /// let a = parser.next()?; - /// let b = parser.next()?; - /// Ok(Self { a, b }) - /// } - /// } - /// - /// let val = Value::Object(b"i\x00\x00\x32\x40s\x00\x00\x00\x05Hello".to_vec()); - /// let foo: Foo = val.parse_obj().unwrap(); - /// - /// assert_eq!(foo.a, 0x3240); - /// assert_eq!(foo.b, "Hello"); - /// ``` - pub fn parse_obj(&self) -> Result - where - T: ObjectParse, - ::Error: std::error::Error + 'static, - { - if let Value::Object(data) = self { - Error::from_res(T::parse_object(data)) - } else { - Err(Error::Expected(Type::Object)) - } +} +impl From for String { + fn from(value: Str) -> Self { + value.0 } - - /// Dump an object into this value - /// - /// ``` - /// use mbon::object::ObjectDump; - /// use mbon::dumper::Dumper; - /// use mbon::error::Error; - /// use mbon::data::Value; - /// - /// struct Foo { - /// a: i32, - /// b: String, - /// } - /// - /// impl ObjectDump for Foo { - /// type Error = Error; - /// - /// fn dump_object(&self) -> Result, Self::Error> { - /// let mut dumper = Dumper::new(); - /// dumper.write(&self.a); - /// dumper.write(&self.b); - /// Ok(dumper.writer()) - /// } - /// } - /// - /// let foo = Foo { a: 0x3240, b: "Hello".to_owned() }; - /// let val = Value::dump_obj(foo).unwrap(); - /// - /// if let Value::Object(v) = val { - /// assert_eq!(v, b"i\x00\x00\x32\x40s\x00\x00\x00\x05Hello"); - /// } else { - /// panic!("val is not an Object"); - /// } - /// ``` - #[inline] - pub fn dump_obj(value: T) -> Result - where - T: ObjectDump, - ::Error: std::error::Error + 'static, - { - let data = Error::from_res(value.dump_object())?; - Ok(Value::Object(data)) +} +impl Str { + pub(crate) fn parse(f: &mut R, l: &Size) -> MbonResult { + let mut buf = vec![0u8; **l as usize]; + f.read_exact(buf.as_mut_slice())?; + let val = String::from_utf8(buf).map_err(|err| MbonError::InvalidData(err.into()))?; + Ok(Self(val)) } - /// Get the total size in bytes that this value uses in binary form - /// - /// ``` - /// use mbon::data::Value; - /// - /// let value = Value::Int(42); - /// - /// assert_eq!(value.size(), 5); - /// ``` - #[inline] - pub fn size(&self) -> usize { - Mark::from(self).size() + pub fn value(&self) -> &str { + &self.0 } +} - /// Get the size in bytes that the data will use in binary form - /// - /// ``` - /// use mbon::data::Value; - /// - /// let value = Value::Int(42); - /// - /// assert_eq!(value.data_size(), 4); - /// ``` - #[inline] - pub fn data_size(&self) -> usize { - Mark::from(self).data_size() - } +#[derive(Debug, Clone)] +pub struct List { + items: Vec, + start: u64, + end: u64, +} - /// Get the size in bytes that the mark will use in binary form - /// - /// ``` - /// use mbon::data::Value; - /// - /// let value = Value::Int(42); - /// - /// assert_eq!(value.mark_size(), 1); - /// ``` - #[inline] - pub fn mark_size(&self) -> usize { - Mark::from(self).mark_size() +#[maybe_async] +impl List { + pub(crate) fn new(start: u64, l: &Size) -> MbonResult { + let end = start + **l; + Ok(List { + items: Vec::new(), + start, + end, + }) } - /// Check if a list can be stored as an array - /// - /// If all elements in the list have the same mark, then the list can be an - /// array. - /// - /// ``` - /// use mbon::data::Value; - /// - /// let array = vec![Value::Int(32), Value::Int(42)]; - /// assert_eq!(Value::can_be_array(&array), true); - /// - /// let list = vec![Value::Int(32), Value::Char(42)]; - /// assert_eq!(Value::can_be_array(&list), false); - /// ``` - pub fn can_be_array<'t, I>(list: I) -> bool - where - I: IntoIterator, - { - let mut iter = list.into_iter(); - if let Some(first) = iter.next() { - let first_mark = Mark::from_value(first); - - for val in iter { - let mark = Mark::from_value(val); - if mark != first_mark { - return false; - } - } - true - } else { - false + pub async fn fetch<'t, E: MbonParserRead>( + &'t mut self, + client: &mut E, + index: usize, + ) -> MbonResult> { + if self.items.len() > index { + return Ok(Some(&mut self.items[index])); } - } - /// Check if a map can be stored as a dict - /// - /// If each key-value pair uses the same marks then the map can be a dict. - /// - /// ``` - /// use mbon::data::Value; - /// - /// let dict = vec![ - /// (Value::Str("a".to_owned()), Value::Int(32)), - /// (Value::Str("b".to_owned()), Value::Int(42)), - /// ]; - /// assert_eq!(Value::can_be_dict(&dict), true); - /// - /// let map = vec![ - /// (Value::Str("a".to_owned()), Value::Int(32)), - /// (Value::Str("hello".to_owned()), Value::Int(42)), - /// ]; - /// assert_eq!(Value::can_be_dict(&map), false); - /// ``` - pub fn can_be_dict<'t, I>(map: I) -> bool - where - I: IntoIterator, - { - let mut iter = map.into_iter(); - if let Some((first_k, first_v)) = iter.next() { - let key_mark: Mark = first_k.into(); - let val_mark: Mark = first_v.into(); - - for (k, v) in iter { - let km: Mark = k.into(); - let vm: Mark = v.into(); - if km != key_mark || vm != val_mark { - return false; - } + let mut location = match self.items.last() { + Some(item) => item.location + item.mark.total_len(), + None => self.start, + }; + if location > self.end { + return Err(MbonError::InvalidMark); + } + let mut len = self.items.len(); + + loop { + let (mark, pos) = client.parse_mark(SeekFrom::Start(location)).await?; + let item = PartialItem::new(mark, pos); + location = item.location + item.mark.total_len(); + if location > self.end { + return Err(MbonError::InvalidMark); + } + self.items.push(item); + len += 1; + + if len == index + 1 { + return Ok(Some(&mut self.items[index])); + } + if location == self.end { + return Ok(None); } - true - } else { - false } } -} -impl AsRef for Value { - fn as_ref(&self) -> &Value { - &self + #[inline] + pub fn get(&self, index: usize) -> Option<&PartialItem> { + self.items.get(index) + } + + #[inline] + pub fn get_mut(&mut self, index: usize) -> Option<&mut PartialItem> { + self.items.get_mut(index) } } -/// An indicator of what's contained in the value. -/// -/// This is the first thing that is read/written in mbon, and it will tell the -/// reader how to read the value that the mark represents. -#[derive(Debug, PartialEq, Eq)] -pub enum Mark { - Long, - Int, - Short, - Char, - Float, - Double, - Bytes(usize), - Str(usize), - Object(usize), - Enum(Box), - Null, - Array(usize, Box), - List(usize), - Dict(usize, Box, Box), - Map(usize), +#[derive(Debug, Clone)] +pub struct Array { + items: Vec>, + pub mark: Arc, + start: u64, } -impl Mark { - /// Get the size in bytes that the mark will use in binary form - /// - /// ``` - /// use mbon::data::Mark; - /// - /// assert_eq!(Mark::Int.mark_size(), 1); - /// ``` - pub fn mark_size(&self) -> usize { - match self { - Mark::Long => 1, - Mark::Int => 1, - Mark::Short => 1, - Mark::Char => 1, - Mark::Float => 1, - Mark::Double => 1, - Mark::Bytes(_) => 5, - Mark::Str(_) => 5, - Mark::Object(_) => 5, - Mark::Enum(m) => 1 + m.mark_size(), - Mark::Null => 1, - Mark::Array(_, m) => 5 + m.mark_size(), - Mark::List(_) => 5, - Mark::Dict(_, k, v) => 5 + k.mark_size() + v.mark_size(), - Mark::Map(_) => 5, - } +#[maybe_async] +impl Array { + pub fn new(start: u64, mark: Arc, n: &Size) -> MbonResult { + let items = vec![None; **n as usize]; + Ok(Array { items, mark, start }) } - /// Get the size in bytes that the data will use in binary form - /// - /// ``` - /// use mbon::data::Mark; - /// - /// assert_eq!(Mark::Int.data_size(), 4); - /// ``` - pub fn data_size(&self) -> usize { - match self { - Mark::Long => 8, - Mark::Int => 4, - Mark::Short => 2, - Mark::Char => 1, - Mark::Float => 4, - Mark::Double => 8, - Mark::Bytes(n) => *n, - Mark::Str(n) => *n, - Mark::Object(n) => *n, - Mark::Enum(m) => m.data_size() + 4, - Mark::Null => 0, - Mark::Array(len, m) => len * m.data_size(), - Mark::List(n) => *n, - Mark::Dict(len, k, v) => len * (k.data_size() + v.data_size()), - Mark::Map(n) => *n, + pub async fn fetch<'t, E: MbonParserRead>( + &'t mut self, + client: &mut E, + index: usize, + ) -> MbonResult> { + if self.items.len() <= index { + return Ok(None); + } + + if self.items[index].is_some() { + return Ok(self.items[index].as_mut()); } + + let len = self.mark.data_len(); + let location = self.start + len * (index as u64); + let (data, _) = client + .parse_data(&self.mark, SeekFrom::Start(location)) + .await?; + + self.items[index] = Some(data); + Ok(self.items[index].as_mut()) } - /// Get the total size in bytes that this value uses in binary form - /// - /// ``` - /// use mbon::data::Mark; - /// - /// assert_eq!(Mark::Int.size(), 5); - /// ``` #[inline] - pub fn size(&self) -> usize { - self.mark_size() + self.data_size() + pub fn get(&self, index: usize) -> Option<&Data> { + self.items.get(index).map(|v| v.as_ref()).flatten() } - /// Get the type of this mark - pub fn get_type(&self) -> Type { - match self { - Mark::Long => Type::Long, - Mark::Int => Type::Int, - Mark::Short => Type::Short, - Mark::Char => Type::Char, - Mark::Float => Type::Float, - Mark::Double => Type::Double, - Mark::Bytes(_) => Type::Bytes, - Mark::Str(_) => Type::Str, - Mark::Object(_) => Type::Object, - Mark::Enum(_) => Type::Enum, - Mark::Null => Type::Null, - Mark::Array(_, _) => Type::Array, - Mark::List(_) => Type::List, - Mark::Dict(_, _, _) => Type::Dict, - Mark::Map(_) => Type::Map, - } + #[inline] + pub fn get_mut(&mut self, index: usize) -> Option<&mut Data> { + self.items.get_mut(index).map(|v| v.as_mut()).flatten() } +} - /// Get the mark from a value - /// - /// ``` - /// use mbon::data::{Mark, Value}; - /// - /// assert_eq!(Mark::from_value(Value::Int(32)), Mark::Int); - /// assert_eq!( - /// Mark::from_value(Value::Str("Hello".to_owned())), - /// Mark::Str(5) - /// ); - /// ``` - pub fn from_value(val: impl AsRef) -> Self { - match val.as_ref() { - Value::Long(_) => Self::Long, - Value::Int(_) => Self::Int, - Value::Short(_) => Self::Short, - Value::Char(_) => Self::Char, - Value::Float(_) => Self::Float, - Value::Double(_) => Self::Double, - Value::Bytes(v) => Self::Bytes(v.len()), - Value::Str(v) => Self::Str(v.len()), - Value::Object(v) => Self::Object(v.len()), - Value::Enum(_, v) => Self::Enum(Box::new(Self::from_value(v))), - Value::Null => Self::Null, - Value::List(v) => { - if Value::can_be_array(v) { - let first = v.first().unwrap(); - Self::Array(v.len(), Box::new(Self::from_value(first))) - } else { - Self::List(v.iter().map(|v| Self::from_value(v).size()).sum()) - } - } - Value::Map(v) => { - if Value::can_be_dict(v) { - let (first_k, first_v) = v.first().unwrap(); - Self::Dict( - v.len(), - Box::new(Self::from_value(first_k)), - Box::new(Self::from_value(first_v)), - ) - } else { - Self::Map( - v.iter() - .map(|(k, v)| Self::from_value(k).size() + Self::from_value(v).size()) - .sum(), - ) - } - } - } - } +#[derive(Debug, Clone)] +pub struct Struct { + items: Vec<(Option, Option)>, + pub key: Arc, + pub val: Arc, + start: u64, } -impl AsRef for Mark { - fn as_ref(&self) -> &Mark { - self +#[maybe_async] +impl Struct { + pub fn new(start: u64, key: Arc, val: Arc, n: &Size) -> MbonResult { + let items = vec![(None, None); **n as usize]; + Ok(Self { + items, + key, + val, + start, + }) } -} -impl From for Type { - fn from(m: Mark) -> Self { - m.get_type() + async fn fetch_nth<'t, E: MbonParserRead>( + &'t mut self, + client: &mut E, + index: usize, + ) -> MbonResult> { + let item_i = index / 2; + let korv = index & 0b1 != 0; + + let (k, v) = match self.items.get_mut(item_i) { + Some(v) => v, + None => return Ok(None), + }; + let (val, mark) = match korv { + true => (k, &self.key), + false => (v, &self.val), + }; + if let Some(val) = val { + return Ok(Some(val)); + } + let key_len = self.key.data_len(); + let val_len = self.val.data_len(); + let mut offset = (key_len + val_len) * item_i as u64; + if !korv { + offset += key_len; + } + + let (data, _) = client + .parse_data(mark, SeekFrom::Start(self.start + offset)) + .await?; + + let _ = mem::replace(val, Some(data)); + + Ok(val.as_mut()) } -} -impl<'t> From<&'t Mark> for Type { - fn from(m: &'t Mark) -> Self { - m.get_type() + #[inline] + pub async fn fetch_key<'t, E: MbonParserRead>( + &'t mut self, + client: &mut E, + index: usize, + ) -> MbonResult> { + self.fetch_nth(client, index * 2).await } -} -impl From for Mark { - fn from(v: Value) -> Self { - Self::from_value(v) + #[inline] + pub async fn fetch_val<'t, E: MbonParserRead>( + &'t mut self, + client: &mut E, + index: usize, + ) -> MbonResult> { + self.fetch_nth(client, index * 2 + 1).await } -} -impl<'t> From<&'t Value> for Mark { - fn from(v: &'t Value) -> Self { - Self::from_value(v) + pub async fn fetch_by_key<'t, E: MbonParserRead>( + &'t mut self, + client: &mut E, + key: &Data, + ) -> MbonResult> { + for i in 0..self.items.len() { + if let Some(k) = self.fetch_key(client, i).await? { + todo!() + } + } + + todo!() } } -/// An indicator for what type of value is stored -/// -/// This is the first byte that is read/written and indicates how to read the -/// mark that is contained. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Type { - Long, - Int, - Short, - Char, - Float, - Double, - Bytes, - Str, - Object, - Enum, +#[derive(Debug, Clone, EnumAsInner)] +pub enum Data { Null, - Array, - List, - Dict, - Map, + U8(U8), + U16(U16), + U32(U32), + U64(U64), + I8(I8), + I16(I16), + I32(I32), + I64(I64), + F32(F32), + F64(F64), + C8(C8), + C16(C16), + C32(C32), + String(Str), + List(List), + Array(Array), + Struct(Struct), } -impl Display for Type { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +impl Data { + pub(crate) fn parse(f: &mut R, mark: &Mark) -> MbonResult { + Ok(match mark { + Mark::Null => Self::Null, + Mark::Unsigned(b) => match b { + 1 => Self::U8(U8::parse(f)?), + 2 => Self::U16(U16::parse(f)?), + 4 => Self::U32(U32::parse(f)?), + 8 => Self::U64(U64::parse(f)?), + _ => return Err(MbonError::InvalidMark), + }, + Mark::Signed(b) => match b { + 1 => Self::I8(I8::parse(f)?), + 2 => Self::I16(I16::parse(f)?), + 4 => Self::I32(I32::parse(f)?), + 8 => Self::I64(I64::parse(f)?), + _ => return Err(MbonError::InvalidMark), + }, + Mark::Float(b) => match b { + 4 => Self::F32(F32::parse(f)?), + 8 => Self::F64(F64::parse(f)?), + _ => return Err(MbonError::InvalidMark), + }, + Mark::Char(b) => match b { + 1 => Self::C8(C8::parse(f)?), + 2 => Self::C16(C16::parse(f)?), + 4 => Self::C32(C32::parse(f)?), + _ => return Err(MbonError::InvalidMark), + }, + Mark::String(l) => Self::String(Str::parse(f, l)?), + Mark::Array(v, n) => Self::Array(Array::new(f.stream_position()?, v.clone(), &n)?), + Mark::List(l) => Self::List(List::new(f.stream_position()?, l)?), + Mark::Struct(k, v, n) => { + Self::Struct(Struct::new(f.stream_position()?, k.clone(), v.clone(), &n)?) + } + Mark::Map(_) => todo!(), + Mark::Enum(_, _) => todo!(), + Mark::Space => todo!(), + Mark::Padding(_) => todo!(), + Mark::Pointer(_) => todo!(), + Mark::Rc(_, _) => todo!(), + Mark::Heap(_) => todo!(), + }) + } + + pub fn maybe_eq(&self, other: &Self) -> Option { match self { - Type::Long => f.write_str("Long"), - Type::Int => f.write_str("Int"), - Type::Short => f.write_str("Short"), - Type::Char => f.write_str("Char"), - Type::Float => f.write_str("Float"), - Type::Double => f.write_str("Double"), - Type::Bytes => f.write_str("Bytes"), - Type::Str => f.write_str("Str"), - Type::Object => f.write_str("Object"), - Type::Enum => f.write_str("Enum"), - Type::Null => f.write_str("Null"), - Type::Array => f.write_str("Array"), - Type::List => f.write_str("List"), - Type::Dict => f.write_str("Dict"), - Type::Map => f.write_str("Map"), + Data::Null => Some(other.is_null()), + Data::U8(l) => Some(other.as_u8().map(|r| **l == **r).unwrap_or(false)), + Data::U16(l) => Some(other.as_u16().map(|r| **l == **r).unwrap_or(false)), + Data::U32(l) => Some(other.as_u32().map(|r| **l == **r).unwrap_or(false)), + Data::U64(l) => Some(other.as_u64().map(|r| **l == **r).unwrap_or(false)), + Data::I8(l) => Some(other.as_i8().map(|r| **l == **r).unwrap_or(false)), + Data::I16(l) => Some(other.as_i16().map(|r| **l == **r).unwrap_or(false)), + Data::I32(l) => Some(other.as_i32().map(|r| **l == **r).unwrap_or(false)), + Data::I64(l) => Some(other.as_i64().map(|r| **l == **r).unwrap_or(false)), + Data::F32(l) => Some(other.as_f32().map(|r| **l == **r).unwrap_or(false)), + Data::F64(l) => Some(other.as_f64().map(|r| **l == **r).unwrap_or(false)), + Data::C8(l) => Some(other.as_c8().map(|r| **l == **r).unwrap_or(false)), + Data::C16(l) => Some(other.as_c16().map(|r| **l == **r).unwrap_or(false)), + Data::C32(l) => Some(other.as_c32().map(|r| **l == **r).unwrap_or(false)), + Data::String(l) => Some(other.as_string().map(|r| **l == **r).unwrap_or(false)), + Data::List(_) => todo!(), + Data::Array(_) => todo!(), + Data::Struct(_) => todo!(), } } } -impl Type { - /// Get the prefix that will indicate the value type - #[inline] - pub fn prefix(&self) -> u8 { - match self { - Type::Long => b'l', - Type::Int => b'i', - Type::Short => b'h', - Type::Char => b'c', - Type::Float => b'f', - Type::Double => b'd', - Type::Bytes => b'b', - Type::Str => b's', - Type::Object => b'o', - Type::Enum => b'e', - Type::Null => b'n', - Type::Array => b'a', - Type::List => b'A', - Type::Dict => b'm', - Type::Map => b'M', +#[derive(Debug, Clone)] +pub struct PartialItem { + pub mark: Mark, + pub data: Option, + location: u64, +} + +impl PartialItem { + pub fn new(mark: Mark, location: u64) -> Self { + Self { + mark, + location, + data: None, } } - /// Get the type from the given prefix - pub fn from_prefix(prefix: u8) -> Result { - match prefix { - b'l' => Ok(Type::Long), - b'i' => Ok(Type::Int), - b'h' => Ok(Type::Short), - b'c' => Ok(Type::Char), - b'f' => Ok(Type::Float), - b'd' => Ok(Type::Double), - b'b' => Ok(Type::Bytes), - b's' => Ok(Type::Str), - b'o' => Ok(Type::Object), - b'e' => Ok(Type::Enum), - b'n' => Ok(Type::Null), - b'a' => Ok(Type::Array), - b'A' => Ok(Type::List), - b'm' => Ok(Type::Dict), - b'M' => Ok(Type::Map), - _ => Err(Error::DataError(format!("Unknown prefix `{}`", prefix))), - } + pub(crate) fn parse_data(&mut self, f: &mut R) -> MbonResult<()> { + f.seek(SeekFrom::Start(self.location))?; + let data = Data::parse(f, &self.mark)?; + self.data = Some(data); + Ok(()) } } diff --git a/src/data/de.rs b/src/data/de.rs deleted file mode 100644 index e9a9389..0000000 --- a/src/data/de.rs +++ /dev/null @@ -1,625 +0,0 @@ -//! # Serde Deserializer implementation for [Value] -//! -//! [Value]: mbon::data::Value - -use crate::error::Error; - -use super::{Type, Value}; -use serde::{de, Deserializer}; - -pub struct ValueDe<'de> { - input: &'de Value, -} -pub struct ValueSeqAccess<'de> { - seq: &'de Vec, - index: usize, -} -pub struct ValueMapAccess<'de> { - seq: &'de Vec<(Value, Value)>, - index: usize, -} -pub struct ValueEnumAccess<'de> { - parent: &'de Value, - value: &'de Value, -} - -impl<'de> ValueDe<'de> { - #[inline] - pub fn new(input: &'de Value) -> Self { - Self { input } - } -} - -impl<'de> ValueSeqAccess<'de> { - #[inline] - fn new(seq: &'de Vec) -> Self { - Self { seq, index: 0 } - } -} - -impl<'de> ValueMapAccess<'de> { - #[inline] - fn new(seq: &'de Vec<(Value, Value)>) -> Self { - Self { seq, index: 0 } - } -} - -impl<'de> ValueEnumAccess<'de> { - #[inline] - fn new(parent: &'de Value, value: &'de Value) -> Self { - Self { parent, value } - } -} - -impl<'de> ValueDe<'de> { - fn next_i8(&self) -> Result { - match self.input { - Value::Long(v) => Ok(i8::try_from(*v)?), - Value::Int(v) => Ok(i8::try_from(*v)?), - Value::Short(v) => Ok(i8::try_from(*v)?), - Value::Char(v) => Ok(*v), - _ => Err(Error::Expected(Type::Char)), - } - } - - fn next_i16(&self) -> Result { - match self.input { - Value::Long(v) => Ok(i16::try_from(*v)?), - Value::Int(v) => Ok(i16::try_from(*v)?), - Value::Short(v) => Ok(*v), - Value::Char(v) => Ok(*v as i16), - _ => Err(Error::Expected(Type::Short)), - } - } - - fn next_i32(&self) -> Result { - match self.input { - Value::Long(v) => Ok(i32::try_from(*v)?), - Value::Int(v) => Ok(*v), - Value::Short(v) => Ok(*v as i32), - Value::Char(v) => Ok(*v as i32), - _ => Err(Error::Expected(Type::Int)), - } - } - - fn next_i64(&self) -> Result { - match self.input { - Value::Long(v) => Ok(*v), - Value::Int(v) => Ok(*v as i64), - Value::Short(v) => Ok(*v as i64), - Value::Char(v) => Ok(*v as i64), - _ => Err(Error::Expected(Type::Long)), - } - } - - fn next_u8(&self) -> Result { - match self.input { - Value::Long(v) => Ok(u8::try_from(*v as u64)?), - Value::Int(v) => Ok(u8::try_from(*v as u32)?), - Value::Short(v) => Ok(u8::try_from(*v as u16)?), - Value::Char(v) => Ok(*v as u8), - _ => Err(Error::Expected(Type::Char)), - } - } - - fn next_u16(&self) -> Result { - match self.input { - Value::Long(v) => Ok(u16::try_from(*v as u64)?), - Value::Int(v) => Ok(u16::try_from(*v as u32)?), - Value::Short(v) => Ok(*v as u16), - Value::Char(v) => Ok(*v as u16), - _ => Err(Error::Expected(Type::Short)), - } - } - - fn next_u32(&self) -> Result { - match self.input { - Value::Long(v) => Ok(u32::try_from(*v as u64)?), - Value::Int(v) => Ok(*v as u32), - Value::Short(v) => Ok((*v as u16) as u32), - Value::Char(v) => Ok((*v as u8) as u32), - _ => Err(Error::Expected(Type::Int)), - } - } - - fn next_u64(&self) -> Result { - match self.input { - Value::Long(v) => Ok(*v as u64), - Value::Int(v) => Ok((*v as u32) as u64), - Value::Short(v) => Ok((*v as u16) as u64), - Value::Char(v) => Ok((*v as u8) as u64), - _ => Err(Error::Expected(Type::Long)), - } - } - - fn next_f32(&self) -> Result { - match self.input { - Value::Double(v) => Ok(*v as f32), - Value::Float(v) => Ok(*v), - _ => Err(Error::Expected(Type::Float)), - } - } - - fn next_f64(&self) -> Result { - match self.input { - Value::Double(v) => Ok(*v), - Value::Float(v) => Ok(*v as f64), - _ => Err(Error::Expected(Type::Float)), - } - } - - fn next_bytes(&self) -> Result<&Vec, Error> { - match self.input { - Value::Bytes(v) => Ok(v), - Value::Object(v) => Ok(v), - _ => Err(Error::Expected(Type::Bytes)), - } - } - - fn next_str(&self) -> Result<&str, Error> { - match self.input { - Value::Str(v) => Ok(v), - _ => Err(Error::Expected(Type::Str)), - } - } -} - -impl<'de> de::Deserializer<'de> for ValueDe<'de> { - type Error = Error; - - fn deserialize_any(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.input { - Value::Long(v) => visitor.visit_i64(*v), - Value::Int(v) => visitor.visit_i32(*v), - Value::Short(v) => visitor.visit_i16(*v), - Value::Char(v) => visitor.visit_i8(*v), - Value::Float(v) => visitor.visit_f32(*v), - Value::Double(v) => visitor.visit_f64(*v), - Value::Bytes(v) => visitor.visit_bytes(v), - Value::Str(v) => visitor.visit_str(v), - Value::Object(v) => visitor.visit_bytes(v), - Value::Enum(_, v) => visitor.visit_enum(ValueEnumAccess::new(self.input, v)), - Value::Null => visitor.visit_unit(), - Value::List(v) => visitor.visit_seq(ValueSeqAccess::new(v)), - Value::Map(v) => visitor.visit_map(ValueMapAccess::new(v)), - } - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_bool(self.next_i64()? != 0) - } - - fn deserialize_i8(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_i8(self.next_i8()?) - } - - fn deserialize_i16(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_i16(self.next_i16()?) - } - - fn deserialize_i32(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_i32(self.next_i32()?) - } - - fn deserialize_i64(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_i64(self.next_i64()?) - } - - fn deserialize_u8(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_u8(self.next_u8()?) - } - - fn deserialize_u16(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_u16(self.next_u16()?) - } - - fn deserialize_u32(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_u32(self.next_u32()?) - } - - fn deserialize_u64(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_u64(self.next_u64()?) - } - - fn deserialize_f32(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_f32(self.next_f32()?) - } - - fn deserialize_f64(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_f64(self.next_f64()?) - } - - fn deserialize_char(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - let c = match self.input { - Value::Char(v) => (*v as u8) as char, - _ => char::from_u32(self.next_u32()?) - .ok_or(Error::data_error("Invalid UTF-8 Character"))?, - }; - visitor.visit_char(c) - } - - fn deserialize_str(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_str(self.next_str()?) - } - - fn deserialize_string(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_string(self.next_str()?.to_owned()) - } - - fn deserialize_bytes(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_bytes(self.next_bytes()?) - } - - fn deserialize_byte_buf(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_byte_buf(self.next_bytes()?.to_owned()) - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - if let Value::Null = self.input { - visitor.visit_none() - } else { - visitor.visit_some(self) - } - } - - fn deserialize_unit(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - if let Value::Null = self.input { - visitor.visit_unit() - } else { - Err(Error::Expected(Type::Null)) - } - } - - fn deserialize_unit_struct( - self, - _name: &'static str, - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_unit(visitor) - } - - fn deserialize_newtype_struct( - self, - _name: &'static str, - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - if let Value::List(v) = self.input { - visitor.visit_seq(ValueSeqAccess::new(v)) - } else { - Err(Error::Expected(Type::List)) - } - } - - fn deserialize_tuple(self, _len: usize, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_seq(visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - if let Value::Map(v) = self.input { - visitor.visit_map(ValueMapAccess::new(v)) - } else { - Err(Error::Expected(Type::Map)) - } - } - - fn deserialize_struct( - self, - _name: &'static str, - _fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_map(visitor) - } - - fn deserialize_enum( - self, - _name: &'static str, - _variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - if let Value::Enum(_, v) = self.input { - visitor.visit_enum(ValueEnumAccess::new(self.input, v)) - } else { - Err(Error::Expected(Type::Enum)) - } - } - - fn deserialize_identifier(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - match self.input { - Value::Str(v) => visitor.visit_str(v), - Value::Enum(variant, _) => visitor.visit_u32(*variant), - _ => Err(Error::Expected(Type::Str)), - } - } - - fn deserialize_ignored_any(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_unit() - } -} - -impl<'de> de::SeqAccess<'de> for ValueSeqAccess<'de> { - type Error = Error; - - fn next_element_seed(&mut self, seed: T) -> Result, Self::Error> - where - T: de::DeserializeSeed<'de>, - { - Ok(if let Some(value) = self.seq.get(self.index) { - self.index += 1; - Some(seed.deserialize(ValueDe::new(value))?) - } else { - None - }) - } -} - -impl<'de> de::MapAccess<'de> for ValueMapAccess<'de> { - type Error = Error; - - fn next_key_seed(&mut self, seed: K) -> Result, Self::Error> - where - K: de::DeserializeSeed<'de>, - { - Ok(if let Some((key, _value)) = self.seq.get(self.index) { - Some(seed.deserialize(ValueDe::new(key))?) - } else { - None - }) - } - - fn next_value_seed(&mut self, seed: V) -> Result - where - V: de::DeserializeSeed<'de>, - { - if let Some((_key, value)) = self.seq.get(self.index) { - self.index += 1; - Ok(seed.deserialize(ValueDe::new(value))?) - } else { - Err(Error::Msg("Expected index to be in bounds".into())) - } - } -} - -impl<'de> de::EnumAccess<'de> for ValueEnumAccess<'de> { - type Error = Error; - type Variant = Self; - - fn variant_seed(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> - where - V: de::DeserializeSeed<'de>, - { - let variant = seed.deserialize(ValueDe::new(self.parent))?; - Ok((variant, self)) - } -} - -impl<'de> de::VariantAccess<'de> for ValueEnumAccess<'de> { - type Error = Error; - - fn unit_variant(self) -> Result<(), Self::Error> { - Ok(()) - } - - fn newtype_variant_seed(self, seed: T) -> Result - where - T: de::DeserializeSeed<'de>, - { - seed.deserialize(ValueDe::new(self.value)) - } - - fn tuple_variant(self, _len: usize, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - let de = ValueDe::new(self.value); - de.deserialize_seq(visitor) - } - - fn struct_variant( - self, - _fields: &'static [&'static str], - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - let de = ValueDe::new(self.value); - de.deserialize_map(visitor) - } -} - -#[cfg(test)] -mod test { - use serde::{Deserialize, Serialize}; - - use crate::{error::Error, parser::Parser}; - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - struct Foo { - a: i32, - b: String, - c: bool, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - enum Bar { - Foo, - Cheese(i8), - Hello { a: i32 }, - } - - #[test] - fn test_vec() { - let mut parser = Parser::from(b"ac\x00\x00\x00\x04\x00\x01\x02\x03"); - let arr: Vec = parser.next().unwrap(); - assert_eq!(arr, vec![0, 1, 2, 3]); - } - - #[test] - fn test_struct() { - let mut parser = Parser::from(b"M\x00\x00\x00\x29s\x00\x00\x00\x01ai\x00\x00\x00\x01s\x00\x00\x00\x01bs\x00\x00\x00\x0bHello Worlds\x00\x00\x00\x01cc\x01"); - let arr: Foo = parser.next().unwrap(); - assert_eq!( - arr, - Foo { - a: 1, - b: "Hello World".to_owned(), - c: true - } - ); - } - - #[test] - fn test_enum() { - let data = b"en\x00\x00\x00\x00ec\x00\x00\x00\x01\x10eM\x00\x00\x00\x0b\x00\x00\x00\x02s\x00\x00\x00\x01ai\x00\x00\x00\x10"; - - let mut parser = Parser::from(data); - - let foo: Bar = parser.next().unwrap(); - assert_eq!(foo, Bar::Foo); - - let cheese: Bar = parser.next().unwrap(); - assert_eq!(cheese, Bar::Cheese(16)); - - let hello: Bar = parser.next().unwrap(); - assert_eq!(hello, Bar::Hello { a: 16 }); - } - - #[test] - fn test_expected() { - let mut parser = Parser::from(b"s\x00\x00\x00\x02hi"); - - let err = parser.next::().expect_err("Error::Expected"); - if let Error::Expected(_) = err { - } else { - panic!("Expected Error::Expected"); - } - } - - #[test] - fn test_int_coersion() { - let mut parser = Parser::from(b"c\x32"); - - let val: i32 = parser.next().unwrap(); - assert_eq!(val, 0x32); - } - - #[test] - fn test_bad_int_coersion() { - let mut parser = Parser::from(b"i\x40\x00\x00\x00"); - - let err = parser.next::().expect_err("TryFromIntError"); - if let Error::DataError(_) = err { - } else { - panic!("Expected TryFromIntError"); - } - } - - #[test] - fn test_big_int_coersion() { - let mut parser = Parser::from(b"i\x00\x00\x00\x40"); - - let val: u8 = parser.next().unwrap(); - assert_eq!(val, 0x40); - } -} diff --git a/src/data/ser.rs b/src/data/ser.rs deleted file mode 100644 index 95cd84e..0000000 --- a/src/data/ser.rs +++ /dev/null @@ -1,440 +0,0 @@ -//! # Serde Serializer implementation for [Value] -//! -//! [Value]: mbon::data::Value - -use crate::error::Error; - -use super::Value; -use serde::ser::{self, Serializer}; - -pub struct ValueSer; -pub struct ValueListSer { - list: Vec, -} -pub struct ValueMapSer { - keys: Vec, - values: Vec, -} -pub struct ValueEnumSer { - embed: T, - variant: u32, -} - -impl<'a> Serializer for &'a mut ValueSer { - type Ok = Value; - type Error = Error; - - type SerializeSeq = ValueListSer; - type SerializeTuple = ValueListSer; - type SerializeTupleStruct = ValueListSer; - type SerializeTupleVariant = ValueEnumSer; - type SerializeMap = ValueMapSer; - type SerializeStruct = ValueMapSer; - type SerializeStructVariant = ValueEnumSer; - - fn serialize_bool(self, v: bool) -> Result { - Ok(Value::Char(v as i8)) - } - - fn serialize_i8(self, v: i8) -> Result { - Ok(Value::Char(v)) - } - - fn serialize_i16(self, v: i16) -> Result { - Ok(Value::Short(v)) - } - - fn serialize_i32(self, v: i32) -> Result { - Ok(Value::Int(v)) - } - - fn serialize_i64(self, v: i64) -> Result { - Ok(Value::Long(v)) - } - - fn serialize_u8(self, v: u8) -> Result { - Ok(Value::Char(v as i8)) - } - - fn serialize_u16(self, v: u16) -> Result { - Ok(Value::Short(v as i16)) - } - - fn serialize_u32(self, v: u32) -> Result { - Ok(Value::Int(v as i32)) - } - - fn serialize_u64(self, v: u64) -> Result { - Ok(Value::Long(v as i64)) - } - - fn serialize_f32(self, v: f32) -> Result { - Ok(Value::Float(v)) - } - - fn serialize_f64(self, v: f64) -> Result { - Ok(Value::Double(v)) - } - - fn serialize_char(self, v: char) -> Result { - if v.is_ascii() { - Ok(Value::Char(v as i8)) - } else { - Ok(Value::Int(v as i32)) - } - } - - fn serialize_str(self, v: &str) -> Result { - Ok(Value::Str(v.to_owned())) - } - - fn serialize_bytes(self, v: &[u8]) -> Result { - Ok(Value::Bytes(v.to_owned())) - } - - fn serialize_none(self) -> Result { - self.serialize_unit() - } - - fn serialize_some(self, value: &T) -> Result - where - T: serde::Serialize, - { - value.serialize(self) - } - - fn serialize_unit(self) -> Result { - Ok(Value::Null) - } - - fn serialize_unit_struct(self, _name: &'static str) -> Result { - self.serialize_unit() - } - - fn serialize_unit_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - ) -> Result { - Ok(Value::Enum(variant_index, Box::new(Value::Null))) - } - - fn serialize_newtype_struct( - self, - _name: &'static str, - value: &T, - ) -> Result - where - T: serde::Serialize, - { - value.serialize(self) - } - - fn serialize_newtype_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - value: &T, - ) -> Result - where - T: serde::Serialize, - { - Ok(Value::Enum( - variant_index, - Box::new(value.serialize(&mut ValueSer)?), - )) - } - - fn serialize_seq(self, _len: Option) -> Result { - Ok(ValueListSer::new()) - } - - fn serialize_tuple(self, _len: usize) -> Result { - Ok(ValueListSer::new()) - } - - fn serialize_tuple_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(ValueListSer::new()) - } - - fn serialize_tuple_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Ok(ValueEnumSer::new(variant_index, ValueListSer::new())) - } - - fn serialize_map(self, _len: Option) -> Result { - Ok(ValueMapSer::new()) - } - - fn serialize_struct( - self, - _name: &'static str, - _len: usize, - ) -> Result { - Ok(ValueMapSer::new()) - } - - fn serialize_struct_variant( - self, - _name: &'static str, - variant_index: u32, - _variant: &'static str, - _len: usize, - ) -> Result { - Ok(ValueEnumSer::new(variant_index, ValueMapSer::new())) - } -} - -impl ValueListSer { - fn new() -> Self { - Self { list: Vec::new() } - } - - fn add_element(&mut self, value: &T) -> Result<(), Error> - where - T: serde::Serialize, - { - self.list.push(value.serialize(&mut ValueSer)?); - Ok(()) - } - - fn finish(self) -> Result { - Ok(Value::List(self.list)) - } -} - -impl ValueMapSer { - fn new() -> Self { - Self { - keys: Vec::new(), - values: Vec::new(), - } - } - - fn add_key(&mut self, key: &T) -> Result<(), Error> - where - T: serde::Serialize, - { - self.keys.push(key.serialize(&mut ValueSer)?); - Ok(()) - } - - fn add_val(&mut self, value: &T) -> Result<(), Error> - where - T: serde::Serialize, - { - self.values.push(value.serialize(&mut ValueSer)?); - Ok(()) - } - - fn finish(self) -> Result { - let map = self.keys.into_iter().zip(self.values.into_iter()).collect(); - Ok(Value::Map(map)) - } -} - -impl ValueEnumSer { - fn new(variant: u32, embed: T) -> Self { - Self { embed, variant } - } -} - -impl ser::SerializeSeq for ValueListSer { - type Ok = Value; - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.add_element(value) - } - - fn end(self) -> Result { - self.finish() - } -} - -impl ser::SerializeTuple for ValueListSer { - type Ok = Value; - type Error = Error; - - fn serialize_element(&mut self, value: &T) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.add_element(value) - } - - fn end(self) -> Result { - self.finish() - } -} - -impl ser::SerializeTupleStruct for ValueListSer { - type Ok = Value; - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.add_element(value) - } - - fn end(self) -> Result { - self.finish() - } -} - -impl ser::SerializeMap for ValueMapSer { - type Ok = Value; - type Error = Error; - - fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.add_key(key) - } - - fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.add_val(value) - } - - fn end(self) -> Result { - self.finish() - } -} - -impl ser::SerializeStruct for ValueMapSer { - type Ok = Value; - type Error = Error; - - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.add_key(key)?; - self.add_val(value) - } - - fn end(self) -> Result { - self.finish() - } -} - -impl ser::SerializeTupleVariant for ValueEnumSer { - type Ok = Value; - type Error = Error; - - fn serialize_field(&mut self, value: &T) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.embed.add_element(value) - } - - fn end(self) -> Result { - let value = self.embed.finish()?; - Ok(Value::Enum(self.variant, Box::new(value))) - } -} - -impl ser::SerializeStructVariant for ValueEnumSer { - type Ok = Value; - type Error = Error; - - fn serialize_field( - &mut self, - key: &'static str, - value: &T, - ) -> Result<(), Self::Error> - where - T: serde::Serialize, - { - self.embed.add_key(key)?; - self.embed.add_val(value) - } - - fn end(self) -> Result { - let value = self.embed.finish()?; - Ok(Value::Enum(self.variant, Box::new(value))) - } -} - -#[cfg(test)] -mod test { - use serde::{Deserialize, Serialize}; - - use crate::dumper::Dumper; - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - struct Foo { - a: i32, - b: String, - c: bool, - } - - #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] - enum Bar { - Foo, - Cheese(i8), - Hello { a: i32 }, - } - - #[test] - fn test_vec() { - let arr: Vec = vec![0, 1, 2, 3]; - - let mut dumper = Dumper::new(); - dumper.write(&arr).unwrap(); - assert_eq!(dumper.writer(), b"ac\x00\x00\x00\x04\x00\x01\x02\x03"); - } - - #[test] - fn test_struct() { - let data = Foo { - a: 1, - b: "Hello World".to_owned(), - c: true, - }; - - let mut dumper = Dumper::new(); - dumper.write(&data).unwrap(); - assert_eq!(dumper.writer(), b"M\x00\x00\x00\x29s\x00\x00\x00\x01ai\x00\x00\x00\x01s\x00\x00\x00\x01bs\x00\x00\x00\x0bHello Worlds\x00\x00\x00\x01cc\x01"); - } - - #[test] - fn test_enum() { - let foo = Bar::Foo; - let cheese = Bar::Cheese(16); - let hello = Bar::Hello { a: 16 }; - - let expected = b"en\x00\x00\x00\x00ec\x00\x00\x00\x01\x10ems\x00\x00\x00\x01i\x00\x00\x00\x01\x00\x00\x00\x02a\x00\x00\x00\x10"; - - let mut dumper = Dumper::new(); - dumper.write(&foo).unwrap(); - dumper.write(&cheese).unwrap(); - dumper.write(&hello).unwrap(); - assert_eq!(dumper.writer(), expected); - } -} diff --git a/src/dumper.rs b/src/dumper.rs deleted file mode 100644 index dd43233..0000000 --- a/src/dumper.rs +++ /dev/null @@ -1,908 +0,0 @@ -//! # Dump mbon data -//! -//! Use [Dumper] to serialize mbon data. - -use byteorder::{BigEndian, WriteBytesExt}; -use serde::Serialize; - -use std::io::Write; - -use crate::{ - data::{ser::ValueSer, Mark, Type, Value}, - error::{Error, Result}, - object::ObjectDump, -}; - -/// A struct that writes binary data to a bytes buffer. -/// -/// -/// You can either write data that can be serialized using -/// * [`write()`](Dumper::write) -/// * [`write_obj()`](Dumper::write_obj) -/// -/// Or you can write data directly using -/// * [`write_long()`](Dumper::write_long) -/// * [`write_int()`](Dumper::write_int) -/// * [`write_short()`](Dumper::write_short) -/// * [`write_char()`](Dumper::write_char) -/// * [`write_float()`](Dumper::write_float) -/// * [`write_double()`](Dumper::write_double) -/// * [`write_str()`](Dumper::write_str) -/// * [`write_bytes()`](Dumper::write_bytes) -/// * [`write_object()`](Dumper::write_object) -/// * [`write_enum()`](Dumper::write_enum) -/// * [`write_list()`](Dumper::write_list) -/// * [`write_map()`](Dumper::write_map) -#[derive(Debug)] -pub struct Dumper(W); - -impl From for Dumper -where - T: Write, -{ - fn from(t: T) -> Self { - Dumper(t) - } -} - -impl AsRef for Dumper { - fn as_ref(&self) -> &W { - &self.0 - } -} - -impl AsMut for Dumper { - fn as_mut(&mut self) -> &mut W { - &mut self.0 - } -} - -impl Dumper> { - #[inline] - pub fn new() -> Self { - Self(Vec::new()) - } -} - -impl Dumper -where - W: Write, -{ - /// Get the underlying writer - #[inline] - pub fn writer(self) -> W { - self.0 - } - - /// Get the underlying writer as a reference - #[inline] - pub fn get_writer(&self) -> &W { - &self.0 - } - - /// Get the underlying writer as a mutable reference - #[inline] - pub fn get_writer_mut(&mut self) -> &mut W { - &mut self.0 - } - - #[inline] - fn write_data_long(&mut self, val: i64) -> Result<()> { - self.0.write_i64::(val)?; - Ok(()) - } - - #[inline] - fn write_data_int(&mut self, val: i32) -> Result<()> { - self.0.write_i32::(val)?; - Ok(()) - } - - #[inline] - fn write_data_short(&mut self, val: i16) -> Result<()> { - self.0.write_i16::(val)?; - Ok(()) - } - - #[inline] - fn write_data_char(&mut self, val: i8) -> Result<()> { - self.0.write_i8(val)?; - Ok(()) - } - - #[inline] - fn write_data_float(&mut self, val: f32) -> Result<()> { - self.0.write_f32::(val)?; - Ok(()) - } - - #[inline] - fn write_data_double(&mut self, val: f64) -> Result<()> { - self.0.write_f64::(val)?; - Ok(()) - } - - #[inline] - fn write_data_bytes(&mut self, val: &[u8]) -> Result<()> { - Ok(self.0.write_all(val)?) - } - - #[inline] - fn write_data_str(&mut self, val: &str) -> Result<()> { - Ok(self.0.write_all(val.as_bytes())?) - } - - #[inline] - fn write_data_enum(&mut self, variant: u32, val: impl AsRef) -> Result<()> { - self.write_data_int(variant as i32)?; - self.write_data_value(val) - } - - fn write_data_array<'t, I>(&mut self, val: I) -> Result<()> - where - I: IntoIterator, - { - for v in val { - self.write_data_value(v)?; - } - - Ok(()) - } - - fn write_data_list<'t, I>(&mut self, val: I) -> Result<()> - where - I: IntoIterator, - { - for v in val { - self.write_value(v)?; - } - Ok(()) - } - - fn write_data_dict<'t, I>(&mut self, val: I) -> Result<()> - where - I: IntoIterator, - { - for (k, v) in val { - self.write_data_value(k)?; - self.write_data_value(v)?; - } - - Ok(()) - } - - fn write_data_map<'t, I>(&mut self, val: I) -> Result<()> - where - I: IntoIterator, - { - for (k, v) in val { - self.write_value(k)?; - self.write_value(v)?; - } - Ok(()) - } - - fn write_data_value(&mut self, val: impl AsRef) -> Result<()> { - let val = val.as_ref(); - match val { - Value::Long(v) => self.write_data_long(*v), - Value::Int(v) => self.write_data_int(*v), - Value::Short(v) => self.write_data_short(*v), - Value::Char(v) => self.write_data_char(*v), - Value::Float(v) => self.write_data_float(*v), - Value::Double(v) => self.write_data_double(*v), - Value::Bytes(v) => self.write_data_bytes(v), - Value::Str(v) => self.write_data_str(v), - Value::Object(v) => self.write_data_bytes(v), - Value::Enum(var, v) => self.write_data_enum(*var, v), - Value::Null => Ok(()), - Value::List(v) => { - if Value::can_be_array(v) { - self.write_data_array(v) - } else { - self.write_data_list(v) - } - } - Value::Map(v) => { - if Value::can_be_dict(v) { - self.write_data_dict(v) - } else { - self.write_data_map(v) - } - } - } - } - - #[inline] - fn write_mark_long(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Long.prefix())?) - } - - #[inline] - fn write_mark_int(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Int.prefix())?) - } - - #[inline] - fn write_mark_short(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Short.prefix())?) - } - - #[inline] - fn write_mark_char(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Char.prefix())?) - } - - #[inline] - fn write_mark_float(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Float.prefix())?) - } - - #[inline] - fn write_mark_double(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Double.prefix())?) - } - - #[inline] - fn write_mark_null(&mut self) -> Result<()> { - Ok(self.0.write_u8(Type::Null.prefix())?) - } - - fn write_mark_bytes(&mut self, len: usize) -> Result<()> { - self.0.write_u8(Type::Bytes.prefix())?; - let len: u32 = len.try_into()?; - self.write_data_int(len as i32) - } - - fn write_mark_str(&mut self, len: usize) -> Result<()> { - self.0.write_u8(Type::Str.prefix())?; - let len: u32 = len.try_into()?; - self.write_data_int(len as i32) - } - - fn write_mark_object(&mut self, len: usize) -> Result<()> { - self.0.write_u8(Type::Object.prefix())?; - let len: u32 = len.try_into()?; - self.write_data_int(len as i32) - } - - fn write_mark_enum(&mut self, mark: impl AsRef) -> Result<()> { - self.0.write_u8(Type::Enum.prefix())?; - self.write_mark(mark) - } - - fn write_mark_array(&mut self, len: usize, mark: impl AsRef) -> Result<()> { - self.0.write_u8(Type::Array.prefix())?; - self.write_mark(mark)?; - let len: u32 = len.try_into()?; - self.write_data_int(len as i32) - } - - fn write_mark_list(&mut self, size: usize) -> Result<()> { - self.0.write_u8(Type::List.prefix())?; - let size: u32 = size.try_into()?; - self.write_data_int(size as i32) - } - - fn write_mark_dict( - &mut self, - len: usize, - key_mark: impl AsRef, - val_mark: impl AsRef, - ) -> Result<()> { - self.0.write_u8(Type::Dict.prefix())?; - let len: u32 = len.try_into()?; - self.write_mark(key_mark)?; - self.write_mark(val_mark)?; - self.write_data_int(len as i32) - } - - fn write_mark_map(&mut self, size: usize) -> Result<()> { - self.0.write_u8(Type::Map.prefix())?; - let size: u32 = size.try_into()?; - self.write_data_int(size as i32) - } - - fn write_mark(&mut self, mark: impl AsRef) -> Result<()> { - match mark.as_ref() { - Mark::Long => self.write_mark_long(), - Mark::Int => self.write_mark_int(), - Mark::Short => self.write_mark_short(), - Mark::Char => self.write_mark_char(), - Mark::Float => self.write_mark_float(), - Mark::Double => self.write_mark_double(), - Mark::Bytes(n) => self.write_mark_bytes(*n), - Mark::Str(n) => self.write_mark_str(*n), - Mark::Object(n) => self.write_mark_object(*n), - Mark::Enum(m) => self.write_mark_enum(m), - Mark::Null => self.write_mark_null(), - Mark::Array(n, m) => self.write_mark_array(*n, m), - Mark::List(s) => self.write_mark_list(*s), - Mark::Dict(n, k, v) => self.write_mark_dict(*n, k, v), - Mark::Map(s) => self.write_mark_map(*s), - } - } - - /// Write a serializeable object to the buffer. - /// - /// To use this function, your object must implement Serialize. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// use serde::Serialize; - /// - /// #[derive(Debug, Serialize)] - /// struct Foo { - /// a: i32, - /// b: String, - /// c: f32, - /// } - /// - /// let mut dumper = Dumper::new(); - /// let foo = Foo { - /// a: 42, - /// b: "Hello World".to_owned(), - /// c: 69.420 - /// }; - /// dumper.write(&foo).unwrap(); - /// - /// ``` - pub fn write(&mut self, value: &T) -> Result<()> - where - T: Serialize, - { - let value = value.serialize(&mut ValueSer)?; - self.write_value(&value) - } - - /// Write a binary object to the buffer. - /// - /// To use this function, your object must implement ObjectSerializer. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// use mbon::object::ObjectDump; - /// use mbon::error::Error; - /// - /// struct Foo { - /// a: i32, - /// b: String, - /// c: f32, - /// } - /// - /// impl ObjectDump for Foo { - /// type Error = Error; - /// - /// fn dump_object(&self) -> Result, Self::Error> { - /// let mut dumper = Dumper::new(); - /// dumper.write(&self.a)?; - /// dumper.write(&self.b)?; - /// dumper.write(&self.c)?; - /// Ok(dumper.writer()) - /// } - /// } - /// - /// let mut dumper = Dumper::new(); - /// let foo = Foo { - /// a: 42, - /// b: "Hello World".to_string(), - /// c: 69.420, - /// }; - /// dumper.write_obj(&foo); - /// ``` - pub fn write_obj(&mut self, value: &T) -> Result<()> - where - T: ObjectDump, - ::Error: std::error::Error + 'static, - { - let data = Error::from_res(value.dump_object())?; - self.write_object(&data) - } - - /// Write a 64 bit integer to the dumper - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_long(0x1020304050607080).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"l\x10\x20\x30\x40\x50\x60\x70\x80"); - /// ``` - pub fn write_long(&mut self, val: i64) -> Result<()> { - self.write_mark_long()?; - self.write_data_long(val) - } - - /// Write a 32 bit integer to the dumper - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_int(0x10203040).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"i\x10\x20\x30\x40"); - /// ``` - pub fn write_int(&mut self, val: i32) -> Result<()> { - self.write_mark_int()?; - self.write_data_int(val) - } - - /// Write a 16 bit integer to the dumper - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_short(0x1020).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"h\x10\x20"); - /// ``` - pub fn write_short(&mut self, val: i16) -> Result<()> { - self.write_mark_short()?; - self.write_data_short(val) - } - - /// Write a 8 bit integer to the dumper - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_char(0x10).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"c\x10"); - /// ``` - pub fn write_char(&mut self, val: i8) -> Result<()> { - self.write_mark_char()?; - self.write_data_char(val) - } - - /// Write a 32 bit IEEE754 float to the dumper - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_float(0.0).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"f\x00\x00\x00\x00"); - /// ``` - pub fn write_float(&mut self, val: f32) -> Result<()> { - self.write_mark_float()?; - self.write_data_float(val) - } - - /// Write a 64 bit IEEE754 float to the dumper - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_double(0.0).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"d\x00\x00\x00\x00\x00\x00\x00\x00"); - /// ``` - pub fn write_double(&mut self, val: f64) -> Result<()> { - self.write_mark_double()?; - self.write_data_double(val) - } - - /// Write a string of bytes to the dumper. - /// - /// Note: there can be at most 4294967295 bytes (4.29GB) in the bytearray. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_bytes(b"hello").unwrap(); - /// - /// assert_eq!(dumper.writer(), b"b\x00\x00\x00\x05hello"); - /// ``` - pub fn write_bytes(&mut self, val: impl AsRef<[u8]>) -> Result<()> { - let val = val.as_ref(); - self.write_mark_bytes(val.len())?; - self.write_data_bytes(val) - } - - /// Write a string to the dumper. - /// - /// Note: there can be at most 4294967295 bytes (4.29GB) in the string. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_str("hello").unwrap(); - /// - /// assert_eq!(dumper.writer(), b"s\x00\x00\x00\x05hello"); - /// ``` - pub fn write_str(&mut self, val: impl AsRef) -> Result<()> { - let val = val.as_ref(); - self.write_mark_str(val.len())?; - self.write_data_str(val) - } - - /// Write a binary object to the dumper. - /// - /// This is meant for embedding binary data within the dumper. - /// - /// Note: there can be at most 4294967295 bytes (4.29GB) in the data. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_object("hello").unwrap(); - /// - /// assert_eq!(dumper.writer(), b"o\x00\x00\x00\x05hello"); - /// ``` - pub fn write_object(&mut self, val: impl AsRef<[u8]>) -> Result<()> { - let val = val.as_ref(); - self.write_mark_object(val.len())?; - self.write_data_bytes(val) - } - - /// Write an indexed value to the dumper. - /// - /// This is meant for compatibility with rust enum serialization. - /// - /// An enum is stored as a variant and a value. The variant should determine - /// the type of data that is stored. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// use mbon::data::Value; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_enum(1, Value::Int(0x3000)).unwrap(); - /// - /// assert_eq!(dumper.writer(), b"ei\x00\x00\x00\x01\x00\x00\x30\x00"); - /// ``` - pub fn write_enum(&mut self, variant: u32, val: impl AsRef) -> Result<()> { - let val = val.as_ref(); - self.write_mark_enum(Mark::from(val))?; - self.write_data_enum(variant, val) - } - - /// Write a null value to the dumper. - /// - /// ``` - /// use mbon::dumper::Dumper; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_null(); - /// - /// assert_eq!(dumper.writer(), b"n"); - /// ``` - pub fn write_null(&mut self) -> Result<()> { - self.write_mark_null() - } - - /// Write a list of values to the dumper. - /// - /// This can be written in two forms: - /// * An array of fixed size items - /// * A list of any type of item - /// - /// Note: when the list is stored as an array, there can be at most 4294967296 - /// items and when the list is stored as a list, the total size of the data - /// can be no more than 4294967296 bytes (4.29 GB) - /// - /// ``` - /// use mbon::dumper::Dumper; - /// use mbon::data::Value; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_list(vec![ - /// Value::Char(0x10), - /// Value::Char(0x20), - /// Value::Char(0x30), - /// Value::Char(0x40), - /// Value::Char(0x50) - /// ]); - /// - /// assert_eq!(dumper.writer(), b"ac\x00\x00\x00\x05\x10\x20\x30\x40\x50"); - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_list(vec![ - /// Value::Char(0x10), - /// Value::Char(0x20), - /// Value::Char(0x30), - /// Value::Char(0x40), - /// Value::Str("Hello".to_owned()) - /// ]); - /// - /// assert_eq!(dumper.writer(), - /// b"A\x00\x00\x00\x12c\x10c\x20c\x30c\x40s\x00\x00\x00\x05Hello"); - /// ``` - pub fn write_list(&mut self, val: impl AsRef>) -> Result<()> { - let val = val.as_ref(); - if Value::can_be_array(val) { - self.write_mark_array(val.len(), Mark::from(val.first().unwrap()))?; - self.write_data_array(val) - } else { - self.write_mark_list(val.iter().map(|v| Mark::from(v).size()).sum())?; - self.write_data_list(val) - } - } - - /// Write a key, value map of values to the dumper. - /// - /// This can be written in two forms: - /// * An dict of fixed size key value pairs - /// * A map of any type of key value pairs - /// - /// Note: when the map is stored as a dict, there can be at most 4294967296 - /// pairs and when the map is stored as a map, the total size of the data - /// can be no more than 4294967296 bytes (4.29 GB) - /// - /// ``` - /// use mbon::dumper::Dumper; - /// use mbon::data::Value; - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_map(vec![ - /// (Value::Str("a".to_owned()), Value::Char(0x10)), - /// (Value::Str("b".to_owned()), Value::Char(0x20)), - /// (Value::Str("c".to_owned()), Value::Char(0x30)), - /// ]); - /// - /// assert_eq!(dumper.writer(), - /// b"ms\x00\x00\x00\x01c\x00\x00\x00\x03a\x10b\x20c\x30"); - /// - /// let mut dumper = Dumper::new(); - /// dumper.write_map(vec![ - /// (Value::Str("a".to_owned()), Value::Char(0x10)), - /// (Value::Str("b".to_owned()), Value::Char(0x20)), - /// (Value::Str("c".to_owned()), Value::Short(0x30)), - /// ]); - /// - /// assert_eq!(dumper.writer(), - /// b"M\x00\x00\x00\x19s\x00\x00\x00\x01ac\x10s\x00\x00\x00\x01bc\x20s\x00\x00\x00\x01ch\x00\x30"); - /// ``` - pub fn write_map(&mut self, val: impl AsRef>) -> Result<()> { - let val = val.as_ref(); - if Value::can_be_dict(val) { - let (k, v) = val.first().unwrap(); - self.write_mark_dict(val.len(), Mark::from(k), Mark::from(v))?; - self.write_data_dict(val) - } else { - self.write_mark_map(val.iter().map(|(k, v)| k.size() + v.size()).sum())?; - self.write_data_map(val) - } - } - - /// Write any value to the dumper. - /// - /// This will call the appropriate function for the given value type. - pub fn write_value(&mut self, val: impl AsRef) -> Result<()> { - let val = val.as_ref(); - match val { - Value::Long(v) => self.write_long(*v), - Value::Int(v) => self.write_int(*v), - Value::Short(v) => self.write_short(*v), - Value::Char(v) => self.write_char(*v), - Value::Float(v) => self.write_float(*v), - Value::Double(v) => self.write_double(*v), - Value::Bytes(v) => self.write_bytes(v), - Value::Str(v) => self.write_str(v), - Value::Object(v) => self.write_object(v), - Value::Enum(variant, v) => self.write_enum(*variant, v), - Value::Null => self.write_null(), - Value::List(v) => self.write_list(v), - Value::Map(v) => self.write_map(v), - } - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_long() { - let mut dumper = Dumper::new(); - dumper.write_long(0x3040).unwrap(); - assert_eq!(dumper.0, b"l\x00\x00\x00\x00\x00\x00\x30\x40"); - } - - #[test] - fn test_int() { - let mut dumper = Dumper::new(); - dumper.write_int(0x3040).unwrap(); - assert_eq!(dumper.0, b"i\x00\x00\x30\x40"); - } - - #[test] - fn test_short() { - let mut dumper = Dumper::new(); - dumper.write_short(0x3040).unwrap(); - assert_eq!(dumper.0, b"h\x30\x40"); - } - - #[test] - fn test_char() { - let mut dumper = Dumper::new(); - dumper.write_char(0x40).unwrap(); - assert_eq!(dumper.0, b"c\x40"); - } - - #[test] - fn test_float() { - let mut dumper = Dumper::new(); - dumper.write_float(0.0).unwrap(); - assert_eq!(dumper.0, b"f\x00\x00\x00\x00"); - } - - #[test] - fn test_double() { - let mut dumper = Dumper::new(); - dumper.write_double(0.0).unwrap(); - assert_eq!(dumper.0, b"d\x00\x00\x00\x00\x00\x00\x00\x00"); - } - - #[test] - fn test_bytes() { - let mut dumper = Dumper::new(); - dumper.write_bytes(b"Hello world!").unwrap(); - assert_eq!(dumper.0, b"b\x00\x00\x00\x0cHello world!"); - } - - #[test] - fn test_str() { - let mut dumper = Dumper::new(); - dumper.write_str("Hello world!").unwrap(); - assert_eq!(dumper.0, b"s\x00\x00\x00\x0cHello world!"); - } - - #[test] - fn test_object() { - let mut dumper = Dumper::new(); - dumper.write_object(b"Hello world!").unwrap(); - assert_eq!(dumper.0, b"o\x00\x00\x00\x0cHello world!"); - } - - #[test] - fn test_enum() { - let mut dumper = Dumper::new(); - dumper.write_enum(1, &Value::Int(4)).unwrap(); - assert_eq!(dumper.0, b"ei\x00\x00\x00\x01\x00\x00\x00\x04"); - } - - #[test] - fn test_null() { - let mut dumper = Dumper::new(); - dumper.write_null().unwrap(); - assert_eq!(dumper.0, b"n"); - } - - #[test] - fn test_array() { - let mut dumper = Dumper::new(); - let value = vec![ - Value::Char(1), - Value::Char(2), - Value::Char(3), - Value::Char(4), - Value::Char(5), - ]; - dumper.write_list(&value).unwrap(); - assert_eq!(dumper.0, b"ac\x00\x00\x00\x05\x01\x02\x03\x04\x05"); - } - - #[test] - fn test_obj_array() { - let mut dumper = Dumper::new(); - let value = vec![ - Value::Object(b"i\x00\x00\x00\x69c\x01".to_vec()), - Value::Object(b"i\x00\x00\x00\x10c\x02".to_vec()), - Value::Object(b"i\x00\x00\x00\x42c\x03".to_vec()), - ]; - dumper.write_list(&value).unwrap(); - assert_eq!(dumper.0, b"ao\x00\x00\x00\x07\x00\x00\x00\x03i\x00\x00\x00\x69c\x01i\x00\x00\x00\x10c\x02i\x00\x00\x00\x42c\x03") - } - - #[test] - fn test_obj_array_bad_size() { - let mut dumper = Dumper::new(); - let value = vec![ - Value::Object(b"i\x00\x00\x00\x69c\x01".to_vec()), - Value::Object(b"i\x00\x00\x00\x10h\x00\x02".to_vec()), - Value::Object(b"i\x00\x00\x00\x42c\x03".to_vec()), - ]; - dumper.write_list(&value).unwrap(); - assert_eq!(dumper.0, b"A\x00\x00\x00\x25o\x00\x00\x00\x07i\x00\x00\x00\x69c\x01o\x00\x00\x00\x08i\x00\x00\x00\x10h\x00\x02o\x00\x00\x00\x07i\x00\x00\x00\x42c\x03") - } - - #[test] - fn test_obj_array_bad_type() { - let mut dumper = Dumper::new(); - let value = vec![ - Value::Object(b"i\x00\x00\x00\x69c\x01".to_vec()), - Value::Bytes(b"i\x00\x00\x00\x10c\x02".to_vec()), - Value::Object(b"i\x00\x00\x00\x42c\x03".to_vec()), - ]; - dumper.write_list(&value).unwrap(); - assert_eq!(dumper.0, b"A\x00\x00\x00\x24o\x00\x00\x00\x07i\x00\x00\x00\x69c\x01b\x00\x00\x00\x07i\x00\x00\x00\x10c\x02o\x00\x00\x00\x07i\x00\x00\x00\x42c\x03") - } - - #[test] - fn test_2d_array() { - let mut dumper = Dumper::new(); - let value = vec![ - Value::List(vec![ - Value::Char(1), - Value::Char(2), - Value::Char(3), - Value::Char(4), - Value::Char(5), - ]), - Value::List(vec![ - Value::Char(6), - Value::Char(7), - Value::Char(8), - Value::Char(9), - Value::Char(10), - ]), - Value::List(vec![ - Value::Char(11), - Value::Char(12), - Value::Char(13), - Value::Char(14), - Value::Char(15), - ]), - ]; - - dumper.write_list(value).unwrap(); - - assert_eq!( - dumper.0, - b"aac\x00\x00\x00\x05\x00\x00\x00\x03\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - ) - } - - #[test] - fn test_list() { - let mut dumper = Dumper::new(); - let value = vec![ - Value::Str("Hello".into()), - Value::Char(2), - Value::Char(3), - Value::Char(4), - Value::Char(5), - ]; - dumper.write_list(&value).unwrap(); - assert_eq!( - dumper.0, - b"A\x00\x00\x00\x12s\x00\x00\x00\x05Helloc\x02c\x03c\x04c\x05" - ); - } - - #[test] - fn test_map() { - let mut dumper = Dumper::new(); - let value = vec![ - (Value::Str("a".into()), Value::Char(2)), - (Value::Str("b".into()), Value::Short(5)), - ]; - dumper.write_map(&value).unwrap(); - assert_eq!( - dumper.0, - b"M\x00\x00\x00\x11s\x00\x00\x00\x01ac\x02s\x00\x00\x00\x01bh\x00\x05" - ); - } - - #[test] - fn test_dict() { - let mut dumper = Dumper::new(); - let value = vec![ - (Value::Str("a".into()), Value::Char(2)), - (Value::Str("b".into()), Value::Char(5)), - ]; - dumper.write_map(&value).unwrap(); - assert_eq!(dumper.0, b"ms\x00\x00\x00\x01c\x00\x00\x00\x02a\x02b\x05"); - } -} diff --git a/src/engine.rs b/src/engine.rs new file mode 100644 index 0000000..4a09801 --- /dev/null +++ b/src/engine.rs @@ -0,0 +1,309 @@ +use maybe_async::maybe_async; + +use std::{ + fs::File, + io::{self, Read, Seek, SeekFrom}, + path::Path, + thread::JoinHandle, +}; + +#[cfg(feature = "async")] +use std::sync::{Arc, Mutex, MutexGuard}; +#[cfg(feature = "async-tokio")] +use tokio::task::spawn_blocking; + +use crate::{ + buffer::BufferedReadWrite, + concurrent::{ConcurrentEngineClient, ConcurrentEngineWrapper}, + data::{Data, PartialItem}, + errors::{MbonError, MbonResult}, + marks::Mark, +}; + +/// Functions that are available in an Mbon engine reader +/// +/// These are primarily functions that are for [crate::data] items to use for +/// parsing. +/// +/// The specific functions in this trait need to be narrowed down a bit more. +/// +/// There should be functions that are specialized to the different types of +/// items that are available. +/// +/// I would also like the idea to be able to parse an item in its entirety if +/// requested. Currently, it is setup so that each item that is parsed is only +/// partially parsed. +#[maybe_async] +pub trait MbonParserRead { + async fn parse_mark(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)>; + async fn parse_data(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)>; + async fn parse_item(&mut self, location: SeekFrom) -> MbonResult; + async fn parse_item_n( + &mut self, + location: SeekFrom, + count: Option, + bytes: u64, + parse_data: bool, + ) -> MbonResult>; + async fn parse_data_n( + &mut self, + mark: &Mark, + location: SeekFrom, + n: usize, + ) -> MbonResult>; +} + +#[cfg(feature = "sync")] +type Reader = BufferedReadWrite; +#[cfg(feature = "async")] +type Reader = Arc>>; + +/// Mbon Engine +/// +/// Manages I/O operations for an Mbon file. +pub struct Engine { + file: Reader, +} + +#[cfg(feature = "async")] +impl Clone for Engine { + fn clone(&self) -> Self { + Engine { + file: self.file.clone(), + } + } +} + +impl Engine { + /// Open an Mbon file in write mode + pub fn open_write(path: impl AsRef) -> io::Result { + let f = File::options() + .read(true) + .write(true) + .create(true) + .open(path)?; + Ok(Self::new(f)) + } + + /// Open an Mbon file in read mode + pub fn open_read(path: impl AsRef) -> io::Result { + let f = File::options().read(true).open(path)?; + Ok(Self::new(f)) + } +} + +#[cfg(feature = "async")] +impl Engine { + #[inline] + fn get_file(&mut self) -> MutexGuard> { + self.file.lock().unwrap() + } + + #[inline] + fn new_file(f: BufferedReadWrite) -> Arc>> { + Arc::new(Mutex::new(f)) + } +} + +#[cfg(feature = "sync")] +impl Engine { + #[inline] + fn get_file(&mut self) -> &mut BufferedReadWrite { + &mut self.file + } + #[inline] + fn new_file(f: BufferedReadWrite) -> BufferedReadWrite { + f + } +} + +impl Engine +where + F: Read + Seek + Send + 'static, +{ + /// Spawn a new thread to process engine requests + /// + /// This will return a [JoinHandle] for the new thread and an + /// [ConcurrentEngineClient] which will allow for multiple concurrent + /// requests to the engine. + pub fn spawn_client_thread(self) -> (JoinHandle>, ConcurrentEngineClient) { + let (wrapper, client) = ConcurrentEngineWrapper::new(self); + let handle = wrapper.spawn(); + (handle, client) + } +} + +impl Engine +where + F: Read + Seek, +{ + /// Create a new engine from a file + pub fn new(file: F) -> Self { + Self { + file: Self::new_file(BufferedReadWrite::new(file).build()), + } + } + + /// Synchronously verify the signature + pub fn verify_signature_sync(&mut self) -> MbonResult { + #[allow(unused_mut)] + let mut file = self.get_file(); + file.rewind()?; + let mut buf = [0u8; 8]; + file.read_exact(&mut buf)?; + const EXPECTED: [u8; 8] = [0xEE, 0x6D, 0x62, 0x6F, 0x6E, 0x0D, 0x0A, 0x00]; + if buf != EXPECTED { + return Ok(false); + } + Ok(true) + } + + /// Synchronously parse a mark at the given location + pub fn parse_mark_sync(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)> { + #[allow(unused_mut)] + let mut file = self.get_file(); + let pos = file.seek(location)?; + let (m, _) = Mark::parse(&mut *file)?; + Ok((m, pos)) + } + + /// Synchronously parse an item at the given location + pub fn parse_item_sync(&mut self, location: SeekFrom) -> MbonResult { + #[allow(unused_mut)] + let mut file = self.get_file(); + let pos = file.seek(location)?; + let (m, _) = Mark::parse(&mut *file)?; + let mut item = PartialItem::new(m, pos); + item.parse_data(&mut *file)?; + Ok(item) + } + + /// Synchronously parse several items in a sequence + pub fn parse_item_n_sync( + &mut self, + location: SeekFrom, + count: Option, + bytes: u64, + parse_data: bool, + ) -> MbonResult> { + #[allow(unused_mut)] + let mut file = self.get_file(); + + let mut items = Vec::new(); + let mut read = 0; + let mut pos = file.seek(location)?; + + while count.map(|count| items.len() < count).unwrap_or(true) && read < bytes { + let (m, _) = Mark::parse(&mut *file)?; + let mut item = PartialItem::new(m, pos); + if parse_data { + item.parse_data(&mut *file)?; + } + + let len = item.mark.total_len(); + read += len; + + pos = file.seek(SeekFrom::Start(pos + len))?; + items.push(item); + } + + if read > bytes { + return Err(MbonError::InvalidMark); + } + + Ok(items) + } + + pub fn parse_data_sync(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)> { + #[allow(unused_mut)] + let mut file = self.get_file(); + let pos = file.seek(location)?; + let data = Data::parse(&mut *file, mark)?; + Ok((data, pos)) + } + + pub fn parse_data_n_sync( + &mut self, + mark: &Mark, + location: SeekFrom, + n: usize, + ) -> MbonResult> { + #[allow(unused_mut)] + let mut file = self.get_file(); + + let mut items = Vec::new(); + let start = file.seek(location)?; + + let len = mark.data_len(); + + for i in 0..n { + file.seek(SeekFrom::Start(start + (len * i as u64)))?; + let data = Data::parse(&mut *file, mark)?; + items.push(data); + } + + Ok(items) + } +} + +#[cfg(feature = "async-tokio")] +macro_rules! mbon_parser_impl { + ($self:ident, $s:ident => $expr:expr) => {{ + let mut $s = $self.clone(); + spawn_blocking(move || $expr).await.unwrap() + }}; + ($self:ident, ($($to_clone:ident),*) $s:ident => $expr:expr) => {{ + let mut $s = $self.clone(); + $(let $to_clone = $to_clone.clone());*; + spawn_blocking(move || $expr).await.unwrap() + }}; +} + +#[cfg(feature = "sync")] +macro_rules! mbon_parser_impl { + ($self:ident, $s:ident => $expr:expr) => {{ + let $s = $self; + $expr + }}; + ($self:ident, ($($to_clone:ident),*) $s:ident => $expr:expr) => {{ + let $s = $self; + $expr + }}; +} + +#[maybe_async] +impl MbonParserRead for Engine +where + F: Read + Seek + Send + 'static, +{ + async fn parse_mark(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)> { + mbon_parser_impl!(self, s => s.parse_mark_sync(location)) + } + + async fn parse_data(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)> { + mbon_parser_impl!(self, (mark) s => s.parse_data_sync(&mark, location)) + } + + async fn parse_item(&mut self, location: SeekFrom) -> MbonResult { + mbon_parser_impl!(self, s => s.parse_item_sync(location)) + } + + async fn parse_item_n( + &mut self, + location: SeekFrom, + count: Option, + bytes: u64, + parse_data: bool, + ) -> MbonResult> { + mbon_parser_impl!(self, s => s.parse_item_n_sync(location, count, bytes, parse_data)) + } + + async fn parse_data_n( + &mut self, + mark: &Mark, + location: SeekFrom, + n: usize, + ) -> MbonResult> { + mbon_parser_impl!(self, (mark) s => s.parse_data_n_sync(&mark, location, n)) + } +} diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index b6db1a4..0000000 --- a/src/error.rs +++ /dev/null @@ -1,115 +0,0 @@ -//! # Errors used by mbon - -use std::{fmt::Display, io, num::TryFromIntError, str::Utf8Error, string::FromUtf8Error}; - -use serde::{de, ser}; - -use crate::data::Type; - -pub type Result = std::result::Result; - -/// The base error type for mbon -#[derive(Debug)] -pub enum Error { - /// A type was expected, but a different one was found - Expected(Type), - /// There was a problem with the provided data - DataError(String), - /// There is no more data left, but more was expected - EndOfFile, - /// There was a problem reading the data - IO(io::Error), - /// There was a problem on the user's end - Msg(String), -} - -impl Display for Error { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Error::Expected(t) => f.write_fmt(format_args!("Expected {}", t)), - Error::DataError(t) => f.write_fmt(format_args!("Data Error: {}", t)), - Error::EndOfFile => f.write_str("More data was expected"), - Error::IO(err) => err.fmt(f), - Error::Msg(msg) => f.write_str(msg), - } - } -} - -impl Error { - #[inline] - pub fn data_error(s: impl Display) -> Self { - Self::DataError(s.to_string()) - } - - #[inline] - pub fn msg(s: impl Display) -> Self { - Self::Msg(s.to_string()) - } - - #[inline] - pub fn from_error(err: E) -> Self { - Self::msg(err) - } - - #[inline] - pub fn from_box(err: Box) -> Self { - Self::msg(err) - } - - pub fn from_res(res: std::result::Result) -> Result - where - E: std::error::Error + 'static, - { - match res { - Ok(t) => Ok(t), - Err(e) => Err(Self::from_error(e)), - } - } - - pub fn from_box_res(res: std::result::Result>) -> Result { - match res { - Ok(t) => Ok(t), - Err(e) => Err(Self::from_box(e)), - } - } -} - -impl std::error::Error for Error {} - -impl ser::Error for Error { - fn custom(msg: T) -> Self - where - T: Display, - { - Self::msg(msg) - } -} -impl de::Error for Error { - fn custom(msg: T) -> Self - where - T: Display, - { - Self::msg(msg) - } -} - -impl From for Error { - fn from(err: std::io::Error) -> Self { - Self::IO(err) - } -} -impl From for Error { - fn from(err: Utf8Error) -> Self { - Self::data_error(err) - } -} -impl From for Error { - fn from(err: FromUtf8Error) -> Self { - Self::data_error(err) - } -} -impl From for Error { - fn from(err: TryFromIntError) -> Self { - Self::data_error(err) - } -} diff --git a/src/errors.rs b/src/errors.rs new file mode 100644 index 0000000..23277d6 --- /dev/null +++ b/src/errors.rs @@ -0,0 +1,40 @@ +use std::{ + cell::BorrowMutError, + io::{self, ErrorKind}, +}; + +use enum_as_inner::EnumAsInner; +use thiserror::Error; + +pub type MbonResult = Result; + +#[derive(Debug, Error, EnumAsInner)] +pub enum MbonError { + #[error("Expected more data to parse")] + OutOfData, + #[error("Invalid mark")] + InvalidMark, + #[error("Invalid Signature")] + InvalidSignature, + #[error("Invalid Data: {0}")] + InvalidData(anyhow::Error), + #[error("Internal Error: {0}")] + InternalError(String), + #[error("{0}")] + IOError(io::Error), +} + +impl From for MbonError { + fn from(err: io::Error) -> Self { + match err.kind() { + ErrorKind::UnexpectedEof => Self::OutOfData, + _ => Self::IOError(err), + } + } +} + +impl From for MbonError { + fn from(value: BorrowMutError) -> Self { + Self::InternalError(value.to_string()) + } +} diff --git a/src/items.rs b/src/items.rs new file mode 100644 index 0000000..f2f392c --- /dev/null +++ b/src/items.rs @@ -0,0 +1,109 @@ +use enum_as_inner::EnumAsInner; + +use crate::data::{self}; + +#[derive(Debug, PartialEq, Clone, EnumAsInner)] +pub enum Item { + Null, + U8(u8), + U16(u16), + U32(u32), + U64(u64), + I8(i8), + I16(i16), + I32(i32), + I64(i64), + F32(f32), + F64(f64), + Char(char), + Bytes(Vec), + String(String), + List(Vec), + Map(Vec<(Item, Item)>), + Enum(u32, Box), +} + +macro_rules! item_from { + ($name:ident, $type:ty) => { + impl From<$type> for Item { + fn from(value: $type) -> Self { + Self::$name(value) + } + } + }; + ($name:ident, $type:ty, $value:ident: $expr:expr) => { + impl From<$type> for Item { + fn from($value: $type) -> Self { + Self::$name($expr) + } + } + }; +} + +item_from!(U8, u8); +item_from!(U8, data::U8, v: *v); +item_from!(U16, u16); +item_from!(U16, data::U16, v: *v); +item_from!(U32, u32); +item_from!(U32, data::U32, v:*v); +item_from!(U64, u64); +item_from!(U64, data::U64, v:*v); +item_from!(I8, i8); +item_from!(I8, data::I8, v:*v); +item_from!(I16, i16); +item_from!(I16, data::I16, v:*v); +item_from!(I32, i32); +item_from!(I32, data::I32, v:*v); +item_from!(I64, i64); +item_from!(I64, data::I64, v:*v); +item_from!(F32, f32); +item_from!(F32, data::F32, v:*v); +item_from!(F64, f64); +item_from!(F64, data::F64, v:*v); +item_from!(Char, char); +item_from!(Char, data::C8, v:*v as char); +item_from!(Bytes, Vec); +item_from!(List, Vec); +item_from!(Map, Vec<(Item, Item)>); +item_from!(String, data::Str, v:v.into()); + +impl From> for Item +where + T: Into, +{ + fn from(value: Option) -> Self { + match value { + Some(value) => value.into(), + None => Self::Null, + } + } +} + +impl FromIterator for Item +where + I: Into, +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + Self::List(iter.into_iter().map(|v| v.into()).collect()) + } +} + +impl FromIterator<(K, V)> for Item +where + K: Into, + V: Into, +{ + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + Self::Map( + iter.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(), + ) + } +} diff --git a/src/lib.rs b/src/lib.rs index 9d8a6ae..ccbd32a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,169 +1,13 @@ -//! # Marked Binary Object Notation -//! -//! mbon is a binary notation that is inspired by the NBT format. -//! -//! It is formed of a sequence of strongly typed values. Each made up of two -//! parts: a mark which defines the type and size of the data, followed by the -//! data. Marks can be different in size and so a single byte prefix is used to -//! differenciate between types. -//! -//! This format is self-describing which means that it is able to know if the -//! data is not formatted correctly or a different type was stored than what -//! was expected. Another feature of the self-describing nature of the format -//! is that you can skip values in the data without the need to parse the complete -//! item, e.g. A 1GB value can be easily skipped by only reading the mark. -//! -//! ## Usage -//! -//! ### Dumping -//! -//! You can dump binary data using the [dumper::Dumper] struct. You can -//! write values directly or use serde's serialize to write more complex data. -//! -//! ``` -//! use mbon::dumper::Dumper; -//! -//! let a = 32; -//! let b = "Hello World"; -//! let c = b'a'; -//! -//! let mut dumper = Dumper::new(); -//! dumper.write_int(a).unwrap(); -//! dumper.write(&b).unwrap(); -//! dumper.write(&c).unwrap(); -//! -//! let output = dumper.writer(); -//! assert_eq!(output, b"i\x00\x00\x00\x20s\x00\x00\x00\x0bHello Worldca"); -//! ``` -//! -//! ### Parsing -//! -//! You can parse binary data using the [parser::Parser] struct. You can -//! parse Value's directly, but it is recommended to use serde to parse data. -//! -//! ``` -//! use mbon::parser::Parser; -//! use mbon::data::Value; -//! -//! let data = b"i\x00\x00\x00\x20s\x00\x00\x00\x0bHello Worldca"; -//! -//! let mut parser = Parser::from(data); -//! -//! let a = parser.next_value().unwrap(); -//! let b: String = parser.next().unwrap(); -//! let c: u8 = parser.next().unwrap(); -//! -//! if let Value::Int(a) = a { -//! assert_eq!(a, 32); -//! } else { -//! panic!("a should have been an int"); -//! } -//! -//! assert_eq!(b, "Hello World"); -//! assert_eq!(c, b'a'); -//! ``` -//! -//! ### Embedded Objects -//! -//! If you are wanting to embed a predefined object inside the format, you can -//! impl [object::ObjectDump]/[object::ObjectParse]. Keep in mind that you will -//! need to call [`write_obj()`][write_obj]/[`next_obj()`][next_obj] to take -//! advantage of it. -//! -//! [write_obj]: dumper::Dumper::write_obj -//! [next_obj]: parser::Parser::next_obj -//! -//! ``` -//! use mbon::parser::Parser; -//! use mbon::dumper::Dumper; -//! use mbon::error::Error; -//! use mbon::object::{ObjectDump, ObjectParse}; -//! -//! #[derive(Debug, PartialEq, Eq)] -//! struct Foo { -//! a: i32, -//! b: String, -//! c: char, -//! } -//! -//! impl ObjectDump for Foo { -//! type Error = Error; -//! -//! fn dump_object(&self) -> Result, Self::Error> { -//! let mut dumper = Dumper::new(); -//! -//! dumper.write(&self.a)?; -//! dumper.write(&self.b)?; -//! dumper.write(&self.c)?; -//! -//! Ok(dumper.writer()) -//! } -//! } -//! -//! impl ObjectParse for Foo { -//! type Error = Error; -//! -//! fn parse_object(object: &[u8]) -> Result { -//! let mut parser = Parser::from(object); -//! -//! let a = parser.next()?; -//! let b = parser.next()?; -//! let c = parser.next()?; -//! -//! Ok(Self { a, b, c }) -//! } -//! } -//! -//! let foo = Foo { a: 32, b: "Hello World".to_owned(), c: '🫠' }; -//! let mut dumper = Dumper::new(); -//! -//! dumper.write_obj(&foo).unwrap(); -//! -//! let buf = dumper.writer(); -//! let mut parser = Parser::from(&buf); -//! -//! let new_foo: Foo = parser.next_obj().unwrap(); -//! -//! assert_eq!(foo, new_foo); -//! ``` -//! -//! ### Async Implementations -//! -//! If you want to parse data asynchronously, you may want to use the provided -//! wrappers: [async_wrapper::AsyncDumper], [async_wrapper::AsyncParser]. -//! -//! > You need to enable the feature `async` to use these implementations. -//! -//! ``` -//! # #[cfg(feature = "async")] { -//! # futures::executor::block_on(async { -//! use futures::io::{AsyncWriteExt, Cursor}; -//! -//! use mbon::async_wrapper::{AsyncDumper, AsyncParser}; -//! -//! let writer = Cursor::new(vec![0u8; 5]); -//! let mut dumper = AsyncDumper::from(writer); -//! -//! dumper.write(&15u32)?; -//! dumper.flush().await?; -//! -//! let mut reader = dumper.writer(); -//! reader.set_position(0); -//! -//! let mut parser = AsyncParser::from(reader); -//! -//! let val: u32 = parser.next().await?; -//! -//! assert_eq!(val, 15); -//! # Ok::<(), Box>(()) }).unwrap(); -//! # } -//! ``` -//! +#[cfg(not(any(feature = "sync", feature = "async-tokio")))] +compile_error!("Feature \"sync\" or \"async-tokio\" is required"); +#[cfg(all(feature = "sync", feature = "async-tokio"))] +compile_error!("Only one of \"sync\" or \"async-tokio\" can be active at a time"); -#[cfg(feature = "async")] -pub mod async_wrapper; +pub mod buffer; +pub mod channel; +pub mod concurrent; pub mod data; -pub mod dumper; -pub mod error; -pub mod object; -pub mod parser; +pub mod engine; +pub mod errors; +pub mod items; +pub mod marks; diff --git a/src/marks.rs b/src/marks.rs new file mode 100644 index 0000000..3ae6767 --- /dev/null +++ b/src/marks.rs @@ -0,0 +1,380 @@ +//! [Mark] + +use std::{ + io, + io::{Read, Write}, + ops::Deref, + slice, + sync::Arc, +}; + +use byteorder::ReadBytesExt; +use enum_as_inner::EnumAsInner; + +use crate::errors::{MbonError, MbonResult}; + +/// Size indicator for marks +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Size(pub u64); + +impl Deref for Size { + type Target = u64; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl From for Size { + fn from(value: u64) -> Self { + Self(value) + } +} + +impl From for u64 { + fn from(value: Size) -> Self { + value.0 + } +} + +impl Size { + /// Parse a size from a reader + /// + /// This expects a dynamically sized Size indicator from _insert link to + /// spec_. + pub fn parse(f: &mut R) -> MbonResult<(Self, usize)> { + let mut value = 0; + let mut read = 0; + + let mut i = 0; + loop { + let b = f.read_u8()?; + let v = (b & 0b0111_1111) as u64; + if i == 9 && b > 1 { + // 9 * 7 + 1 == 64 + // If the size is bigger than 64 bits, then return an error + return Err(MbonError::InvalidMark); + } + value |= v << (7 * i); + read += 1; + if (b & 0b1000_0000) == 0 { + break; + } + i += 1; + } + + Ok((Self(value), read)) + } + + /// Write the size to a writer + /// + /// This will write a dynamically sized Size indicator from _insert link to + /// spec_. + pub fn write(&self, f: &mut W) -> io::Result { + let mut value = self.0; + let mut written = 0; + while self.0 > 0 { + let mut v = (value & 0b0111_1111) as u8; + value = value >> 7; + if value > 0 { + v |= 0b1000_0000; + } + f.write_all(slice::from_ref(&v))?; + written += 1; + } + Ok(written) + } + + /// Get the value of the size + #[inline] + pub fn value(&self) -> u64 { + self.0 + } + + /// Get the number of bytes that the size would be represented by + pub fn len(&self) -> u64 { + let mut written = 0; + let mut value = self.0; + while self.0 > 0 { + value = value >> 7; + written += 1; + } + written + } +} + +/// Describes an Mbon item +#[derive(Debug, Clone, PartialEq, Eq, EnumAsInner)] +pub enum Mark { + Null, + Unsigned(u8), + Signed(u8), + Float(u8), + Char(u8), + String(Size), + Array(Arc, Size), + List(Size), + Struct(Arc, Arc, Size), + Map(Size), + Enum(u8, Arc), + Space, + Padding(Size), + Pointer(u8), + Rc(u8, Arc), + Heap(Size), +} + +fn len_b(id: u8) -> u8 { + 1 << (id & 0b11) +} + +fn get_b(v: u8) -> u8 { + match v { + 8 => 3, + 4 => 2, + 2 => 1, + 1 => 0, + _ => 0, + } +} + +const NULL_ID: u8 = 0xc0; +const UNSIGNED_ID: u8 = 0x64; +const SIGNED_ID: u8 = 0x68; +const FLOAT_ID: u8 = 0x6c; +const CHAR_ID: u8 = 0x70; +const STRING_ID: u8 = 0x54; +const ARRAY_ID: u8 = 0x40; +const LIST_ID: u8 = 0x44; +const STRUCT_ID: u8 = 0x48; +const MAP_ID: u8 = 0x4c; +const ENUM_ID: u8 = 0x74; +const SPACE_ID: u8 = 0x80; +const PADDING_ID: u8 = 0x04; +const POINTER_ID: u8 = 0x28; +const RC_ID: u8 = 0x2c; +const HEAP_ID: u8 = 0x10; + +impl Mark { + /// Get the binary id of the mark + pub fn id(&self) -> u8 { + match self { + Mark::Null => NULL_ID, + Mark::Unsigned(v) => get_b(*v) | UNSIGNED_ID, + Mark::Signed(v) => get_b(*v) | SIGNED_ID, + Mark::Float(v) => get_b(*v) | FLOAT_ID, + Mark::Char(v) => get_b(*v) | CHAR_ID, + Mark::String(_) => STRING_ID, + Mark::Array(_, _) => ARRAY_ID, + Mark::List(_) => LIST_ID, + Mark::Struct(_, _, _) => STRUCT_ID, + Mark::Map(_) => MAP_ID, + Mark::Enum(v, _) => get_b(*v) | ENUM_ID, + Mark::Space => SPACE_ID, + Mark::Padding(_) => PADDING_ID, + Mark::Pointer(v) => get_b(*v) | POINTER_ID, + Mark::Rc(v, _) => get_b(*v) | RC_ID, + Mark::Heap(_) => HEAP_ID, + } + } + + /// Parse a mark from a reader + pub fn parse(f: &mut R) -> MbonResult<(Self, usize)> { + let id = f.read_u8()?; + let mut len = 1; + let mark = match id & 0b1111_1100 { + NULL_ID => Self::Null, + UNSIGNED_ID => Self::Unsigned(len_b(id)), + SIGNED_ID => Self::Signed(len_b(id)), + FLOAT_ID => Self::Float(len_b(id)), + CHAR_ID => Self::Char(len_b(id)), + STRING_ID => { + let (size, r) = Size::parse(f)?; + len += r; + Self::String(size) + } + ARRAY_ID => { + let (val, r) = Self::parse(f)?; + len += r; + let (size, r) = Size::parse(f)?; + len += r; + Self::Array(Arc::new(val), size) + } + LIST_ID => { + let (size, r) = Size::parse(f)?; + len += r; + Self::List(size) + } + STRUCT_ID => { + let (key, r) = Self::parse(f)?; + len += r; + let (val, r) = Self::parse(f)?; + len += r; + let (size, r) = Size::parse(f)?; + len += r; + Self::Struct(Arc::new(key), Arc::new(val), size) + } + MAP_ID => { + let (size, r) = Size::parse(f)?; + len += r; + Self::Map(size) + } + ENUM_ID => { + let (mark, r) = Self::parse(f)?; + len += r; + Self::Enum(len_b(id), Arc::new(mark)) + } + SPACE_ID => Self::Space, + PADDING_ID => { + let (size, r) = Size::parse(f)?; + len += r; + Self::Padding(size) + } + POINTER_ID => Self::Pointer(len_b(id)), + RC_ID => { + let (mark, r) = Self::parse(f)?; + len += r; + Self::Rc(len_b(id), Arc::new(mark)) + } + HEAP_ID => { + let (size, r) = Size::parse(f)?; + len += r; + Self::Heap(size) + } + _ => return Err(MbonError::InvalidMark), + }; + Ok((mark, len)) + } + + /// Write the mark to a writer + pub fn write(&self, f: &mut W) -> io::Result { + f.write_all(slice::from_ref(&self.id()))?; + let mut written = 1; + match self { + Mark::String(l) => { + written += l.write(f)?; + } + Mark::Array(v, n) => { + written += v.write(f)?; + written += n.write(f)?; + } + Mark::List(l) => { + written += l.write(f)?; + } + Mark::Struct(k, v, n) => { + written += k.write(f)?; + written += v.write(f)?; + written += n.write(f)?; + } + Mark::Map(l) => { + written += l.write(f)?; + } + Mark::Enum(_, v) => { + written += v.write(f)?; + } + Mark::Padding(l) => { + written += l.write(f)?; + } + Mark::Rc(_, v) => { + written += v.write(f)?; + } + Mark::Heap(l) => { + written += l.write(f)?; + } + _ => {} + } + + Ok(written) + } + + /// Write the mark to a byte buffer + #[inline] + pub fn write_to_buf(&self) -> io::Result> { + let mut buf = Vec::new(); + self.write(&mut buf)?; + Ok(buf) + } + + /// Get the length of the data the mark represents + pub fn data_len(&self) -> u64 { + match self { + Mark::Null => 0, + Mark::Unsigned(b) => *b as u64, + Mark::Signed(b) => *b as u64, + Mark::Float(b) => *b as u64, + Mark::Char(b) => *b as u64, + Mark::String(l) => **l, + Mark::Array(v, n) => v.data_len() * **n, + Mark::List(l) => **l, + Mark::Struct(k, v, n) => (k.data_len() + v.data_len()) * **n, + Mark::Map(l) => **l, + Mark::Enum(b, v) => *b as u64 + v.data_len(), + Mark::Space => 0, + Mark::Padding(l) => **l, + Mark::Pointer(b) => *b as u64, + Mark::Rc(b, v) => *b as u64 + v.data_len(), + Mark::Heap(l) => **l, + } + } + + /// Get the length of the mark + pub fn mark_len(&self) -> u64 { + 1 + match self { + Mark::String(l) => l.len(), + Mark::Array(v, n) => v.mark_len() + n.len(), + Mark::List(l) => l.len(), + Mark::Struct(k, v, n) => k.mark_len() + v.mark_len() + n.len(), + Mark::Map(l) => l.len(), + Mark::Enum(_, v) => v.mark_len(), + Mark::Padding(l) => l.len(), + Mark::Rc(_, v) => v.mark_len(), + Mark::Heap(l) => l.len(), + _ => 0, + } + } + + /// Get the length of the mark and data combined + #[inline] + pub fn total_len(&self) -> u64 { + self.data_len() + self.mark_len() + } +} + +#[cfg(test)] +mod test { + + use super::*; + + #[test] + fn test_simple_parse() { + let mut buf: &[u8] = &[0xc0, 0x64, 0x32]; + let (mark, read) = Mark::parse(&mut buf).unwrap(); + assert_eq!(read, 1); + assert_eq!(mark.is_null(), true); + + let (mark, read) = Mark::parse(&mut buf).unwrap(); + assert_eq!(read, 1); + assert_eq!(mark.is_unsigned(), true); + if let Mark::Unsigned(b) = mark { + assert_eq!(b, 1); + } else { + unreachable!(); + } + + let err = Mark::parse(&mut buf).expect_err("Expected InvalidMark error"); + assert_eq!(err.is_invalid_mark(), true); + } + + #[test] + fn test_size_parse() { + let mut buf: &[u8] = &[0x32, 0x80, 0x31]; + + let (size, read) = Size::parse(&mut buf).unwrap(); + assert_eq!(read, 1); + assert_eq!(*size, 0x32); + + let (size, read) = Size::parse(&mut buf).unwrap(); + assert_eq!(read, 2); + assert_eq!(*size, 0x1880); + } +} diff --git a/src/object.rs b/src/object.rs deleted file mode 100644 index 87d2b56..0000000 --- a/src/object.rs +++ /dev/null @@ -1,153 +0,0 @@ -//! # Custom Object parsing and dumping -//! -//! You can implement [ObjectParse] and [ObjectDump] to allow for custom object -//! dumping and parsing. - -/// A loader that can load a struct from a binary object. -/// -/// A possible use case is to store a struct more efficiently than a map -/// -/// ``` -/// use mbon::object::ObjectParse; -/// use mbon::parser::Parser; -/// use mbon::error::Error; -/// -/// struct Foo { -/// a: u32, -/// b: String, -/// } -/// -/// impl ObjectParse for Foo { -/// type Error = Error; -/// -/// fn parse_object(object: &[u8]) -> Result { -/// let mut parser = Parser::from(object); -/// -/// let a = parser.next()?; -/// let b = parser.next()?; -/// -/// Ok(Self { a, b }) -/// } -/// } -/// ``` -pub trait ObjectParse -where - Self: Sized, -{ - type Error; - - /// Load from a binary object - /// - /// This will parse the given object in a predefined format. - fn parse_object(object: &[u8]) -> Result; -} - -/// A dumper that can dump a binary object from a struct. -/// -/// A possible use case is to store a struct more efficiently than a map -/// -/// ``` -/// use mbon::object::ObjectDump; -/// use mbon::dumper::Dumper; -/// use mbon::error::Error; -/// -/// struct Foo { -/// a: u32, -/// b: String, -/// } -/// -/// impl ObjectDump for Foo { -/// type Error = Error; -/// -/// fn dump_object(&self) -> Result, Self::Error> { -/// let mut dumper = Dumper::new(); -/// -/// dumper.write(&self.a)?; -/// dumper.write(&self.b)?; -/// -/// Ok(dumper.writer()) -/// } -/// } -/// ``` -pub trait ObjectDump { - type Error; - - /// Dump into a binary object - /// - /// This will dump the struct into binary data in a predefined format - fn dump_object(&self) -> Result, Self::Error>; -} - -#[cfg(test)] -mod test { - use std::vec; - - use crate::{dumper::Dumper, error::Error, parser::Parser}; - - use super::*; - - #[derive(Debug, PartialEq, Eq)] - struct TestStruct { - a: String, - b: i32, - c: Vec, - } - - impl ObjectParse for TestStruct { - type Error = Error; - - fn parse_object(data: &[u8]) -> Result { - let mut parser = Parser::from(data); - let a: String = parser.next()?; - let b: i32 = parser.next()?; - let c: Vec = parser.next()?; - Ok(Self { a, b, c }) - } - } - - impl ObjectDump for TestStruct { - type Error = Error; - - fn dump_object(&self) -> Result, Error> { - let mut dumper = Dumper::new(); - dumper.write(&self.a)?; - dumper.write(&self.b)?; - dumper.write(&self.c)?; - Ok(dumper.writer()) - } - } - - #[test] - fn test_deserialize() { - let data = - b"o\x00\x00\x00\x2bs\x00\x00\x00\x0bHello Worldi\x00\x00\x40\x30A\x00\x00\x00\x11s\x00\x00\x00\x04Yeets\x00\x00\x00\x03Bar"; - - let mut parser = Parser::from(data); - let test: TestStruct = parser.next_obj().unwrap(); - assert_eq!( - test, - TestStruct { - a: "Hello World".into(), - b: 0x4030, - c: vec!["Yeet".into(), "Bar".into()] - } - ); - } - - #[test] - fn test_serialize() { - let data = - b"o\x00\x00\x00\x2bs\x00\x00\x00\x0bHello Worldi\x00\x00\x40\x30A\x00\x00\x00\x11s\x00\x00\x00\x04Yeets\x00\x00\x00\x03Bar"; - - let mut dumper = Dumper::new(); - dumper - .write_obj(&TestStruct { - a: "Hello World".into(), - b: 0x4030, - c: vec!["Yeet".into(), "Bar".into()], - }) - .unwrap(); - - assert_eq!(dumper.writer(), data); - } -} diff --git a/src/parser.rs b/src/parser.rs deleted file mode 100644 index f313b23..0000000 --- a/src/parser.rs +++ /dev/null @@ -1,615 +0,0 @@ -//! # Parse mbon data -//! -//! Use [Parser] to deserialize mbon data. - -use crate::{ - data::{Mark, Type, Value}, - error::{Error, Result}, - object::ObjectParse, -}; -use byteorder::{BigEndian, ReadBytesExt}; -use serde::de::DeserializeOwned; - -use std::io::{Read, Seek, SeekFrom}; - -/// A struct that parses binary data from a bytearray -/// -/// You can deserialize data using -/// * [`next()`](Parser::next) -/// * [`next_obj()`](Parser::next_obj) -/// -/// Or you can deserialize data directly using -/// * [`next_value()`](Parser::next_value) -pub struct Parser(R); - -impl<'a, T> From<&'a T> for Parser<&'a [u8]> -where - T: AsRef<[u8]>, -{ - fn from(slice: &'a T) -> Self { - Self(slice.as_ref()) - } -} - -impl From for Parser -where - R: Read, -{ - fn from(reader: R) -> Self { - Self(reader) - } -} - -impl AsRef for Parser { - fn as_ref(&self) -> &R { - &self.0 - } -} - -impl AsMut for Parser { - fn as_mut(&mut self) -> &mut R { - &mut self.0 - } -} - -impl Parser -where - R: Read, -{ - /// Turn the parser into the underlying reader - #[inline] - pub fn reader(self) -> R { - self.0 - } - - /// Get the the underlying reader as a reference - #[inline] - pub fn get_reader(&self) -> &R { - &self.0 - } - - /// Get the the underlying reader as a mutable reference - #[inline] - pub fn get_reader_mut(&mut self) -> &mut R { - &mut self.0 - } - - /// Parse the next item in the parser. - /// - /// ### Example - /// - /// ``` - /// use mbon::parser::Parser; - /// - /// let mut parser = Parser::from(b"i\x00\x00\x00\x42"); - /// let i: u32 = parser.next().unwrap(); - /// - /// assert_eq!(i, 0x42); - /// ``` - #[inline] - pub fn next(&mut self) -> Result - where - T: DeserializeOwned, - { - self.next_value()?.parse() - } - - /// Parse the next custom object in the parser. - /// - /// This allows you to be able to parse custom binary data. A common usecase - /// is to store a struct in a more compact form. You could also use object - /// values to store a different format altogether. - /// - /// Note: the next value in the parser must be an Object - /// - /// ### Example - /// - /// ``` - /// use mbon::error::Error; - /// use mbon::parser::Parser; - /// use mbon::object::ObjectParse; - /// - /// struct Foo { - /// a: i32, - /// b: String, - /// c: f32, - /// } - /// - /// impl ObjectParse for Foo { - /// type Error = Error; - /// - /// fn parse_object(data: &[u8]) -> Result { - /// let mut parser = Parser::from(data); - /// let a = parser.next()?; - /// let b = parser.next()?; - /// let c = parser.next()?; - /// Ok(Self { a, b, c }) - /// } - /// } - /// - /// let mut parser = - /// Parser::from( - /// b"o\x00\x00\x00\x14i\x00\x00\x00\x42s\x00\x00\x00\x05Hellof\x00\x00\x00\x00" - /// ); - /// - /// let foo: Foo = parser.next_obj().unwrap(); - /// assert_eq!(foo.a, 0x42); - /// assert_eq!(foo.b, "Hello"); - /// assert_eq!(foo.c, 0.0); - /// ``` - #[inline] - pub fn next_obj(&mut self) -> Result - where - T: ObjectParse, - ::Error: std::error::Error + 'static, - { - self.next_value()?.parse_obj() - } - - #[inline] - fn next_type(&mut self) -> Result { - Type::from_prefix(self.0.read_u8()?) - } - - fn next_data_n(&mut self, n: usize) -> Result> { - let mut buf = vec![0; n]; - self.0.read_exact(&mut buf)?; - Ok(buf) - } - - #[inline] - fn next_data_long(&mut self) -> Result { - Ok(self.0.read_i64::()?) - } - - #[inline] - fn next_data_int(&mut self) -> Result { - Ok(self.0.read_i32::()?) - } - - #[inline] - fn next_data_short(&mut self) -> Result { - Ok(self.0.read_i16::()?) - } - - #[inline] - fn next_data_char(&mut self) -> Result { - Ok(self.0.read_i8()?) - } - - #[inline] - fn next_data_float(&mut self) -> Result { - Ok(self.0.read_f32::()?) - } - - #[inline] - fn next_data_double(&mut self) -> Result { - Ok(self.0.read_f64::()?) - } - - #[inline] - fn next_data_bytes(&mut self, n: usize) -> Result> { - self.next_data_n(n) - } - - #[inline] - fn next_data_str(&mut self, n: usize) -> Result { - let buf = self.next_data_n(n)?; - Ok(String::from_utf8(buf)?) - } - - fn next_data_enum(&mut self, m: &Mark) -> Result<(u32, Value)> { - let variant = self.next_data_int()? as u32; - let value = self.next_data_value(m)?; - Ok((variant, value)) - } - - fn next_data_array(&mut self, len: usize, t: &Mark) -> Result> { - let mut arr = Vec::with_capacity(len); - - for _ in 0..len { - let v = self.next_data_value(t)?; - arr.push(v); - } - - Ok(arr) - } - - fn next_data_list(&mut self, size: usize) -> Result> { - let mut arr = Vec::new(); - - let mut read = 0; - - while read < size { - let m = self.next_mark()?; - let v = self.next_data_value(&m)?; - arr.push(v); - read += m.size(); - } - - if read > size { - return Err(Error::data_error("List was larger than expected")); - } - - Ok(arr) - } - - fn next_data_dict(&mut self, len: usize, k: &Mark, v: &Mark) -> Result> { - let mut arr = Vec::with_capacity(len); - - for _ in 0..len { - let key = self.next_data_value(k)?; - let val = self.next_data_value(v)?; - arr.push((key, val)); - } - - Ok(arr) - } - - fn next_data_map(&mut self, size: usize) -> Result> { - let mut arr = Vec::new(); - let mut read = 0; - - while read < size { - let k = self.next_mark()?; - let key = self.next_data_value(&k)?; - let v = self.next_mark()?; - let val = self.next_data_value(&v)?; - - arr.push((key, val)); - read += k.size() + v.size(); - } - - if read > size { - return Err(Error::data_error("Map was larger than expected")); - } - - Ok(arr) - } - - pub(crate) fn next_data_value(&mut self, mark: &Mark) -> Result { - Ok(match mark { - Mark::Long => Value::Long(self.next_data_long()?), - Mark::Int => Value::Int(self.next_data_int()?), - Mark::Short => Value::Short(self.next_data_short()?), - Mark::Char => Value::Char(self.next_data_char()?), - Mark::Float => Value::Float(self.next_data_float()?), - Mark::Double => Value::Double(self.next_data_double()?), - Mark::Bytes(n) => Value::Bytes(self.next_data_bytes(*n)?), - Mark::Str(n) => Value::Str(self.next_data_str(*n)?.to_owned()), - Mark::Object(n) => Value::Object(self.next_data_bytes(*n)?.to_vec()), - Mark::Enum(m) => { - let (var, val) = self.next_data_enum(&m)?; - Value::Enum(var, Box::new(val)) - } - Mark::Null => Value::Null, - Mark::Array(n, m) => Value::List(self.next_data_array(*n, &m)?), - Mark::List(n) => Value::List(self.next_data_list(*n)?), - Mark::Dict(n, k, v) => Value::Map(self.next_data_dict(*n, &k, &v)?), - Mark::Map(n) => Value::Map(self.next_data_map(*n)?), - }) - } - - fn next_mark(&mut self) -> Result { - let t = self.next_type()?; - Ok(match t { - Type::Long => Mark::Long, - Type::Int => Mark::Int, - Type::Short => Mark::Short, - Type::Char => Mark::Char, - Type::Float => Mark::Float, - Type::Double => Mark::Double, - Type::Bytes => Mark::Bytes(self.next_data_int()? as usize), - Type::Str => Mark::Str(self.next_data_int()? as usize), - Type::Object => Mark::Object(self.next_data_int()? as usize), - Type::Enum => Mark::Enum(Box::new(self.next_mark()?)), - Type::Null => Mark::Null, - Type::Array => { - let mark = self.next_mark()?; - let len = self.next_data_int()? as usize; - Mark::Array(len, Box::new(mark)) - } - Type::List => Mark::List(self.next_data_int()? as usize), - Type::Dict => { - let k = self.next_mark()?; - let v = self.next_mark()?; - let len = self.next_data_int()? as usize; - Mark::Dict(len, Box::new(k), Box::new(v)) - } - Type::Map => Mark::Map(self.next_data_int()? as usize), - }) - } - - /// Skip the next value in the parser. - /// - /// This will ignore the next value without parsing more than what's - /// necessary. - /// - /// If the reader supports seeking, then it is preffered to use - /// [`seek_next()`](Parser::seek_next) instead. - /// - /// ### Example - /// - /// ``` - /// use mbon::parser::Parser; - /// - /// let mut parser = Parser::from( - /// b"s\x00\x00\x00\x1eI don't care about this stringi\x00\x00\x00\x42" - /// ); - /// - /// parser.skip_next().unwrap(); - /// - /// let v: i32 = parser.next().unwrap(); - /// assert_eq!(v, 0x42); - /// ``` - pub fn skip_next(&mut self) -> Result<()> { - let mark = self.next_mark()?; - let size = mark.data_size(); - - self.next_data_n(size)?; - - Ok(()) - } - - /// Parse the next value in the parser. - /// - /// This will try to read whatever value is next and return it. - /// - /// ### Example - /// - /// ``` - /// use mbon::parser::Parser; - /// use mbon::data::Value; - /// - /// let mut parser = Parser::from(b"i\x00\x00\x00\x42"); - /// - /// assert_eq!(parser.next_value().unwrap(), Value::Int(0x42)); - /// ``` - #[inline] - pub fn next_value(&mut self) -> Result { - let mark = self.next_mark()?; - self.next_data_value(&mark) - } -} - -impl Parser -where - R: Read + Seek, -{ - /// Seek to the next value in the parser. - /// - /// This will efficiently skip the next value without reading more than - /// what's necessary. - /// - /// ### Example - /// - /// ``` - /// use mbon::parser::Parser; - /// use std::io::Cursor; - /// - /// let reader = Cursor::new( - /// b"s\x00\x00\x00\x23This is a string I don't care abouti\x00\x00\x00\x20" - /// ); - /// - /// let mut parser = Parser::from(reader); - /// - /// parser.seek_next().unwrap(); - /// let val: u32 = parser.next().unwrap(); - /// - /// assert_eq!(val, 32); - /// ``` - pub fn seek_next(&mut self) -> Result<()> { - let mark = self.next_mark()?; - let size = mark.data_size(); - - self.0.seek(SeekFrom::Current(size as i64))?; - - Ok(()) - } -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn test_long() { - let mut parser = Parser::from(b"l\x00\x30\x00\x00\x20\x10\x00\x05"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Long(0x0030000020100005)); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_int() { - let mut parser = Parser::from(b"i\x03\x00\x00\x00"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Int(0x03000000)); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_short() { - let mut parser = Parser::from(b"h\x03\x00"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Short(0x0300)); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_char() { - let mut parser = Parser::from(b"c\x03"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Char(0x03)); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_float() { - let mut parser = Parser::from(b"f\x00\x00\x00\x00"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Float(0.0)); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_double() { - let mut parser = Parser::from(b"d\x00\x00\x00\x00\x00\x00\x00\x00"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Double(0.0)); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_bytes() { - let mut parser = Parser::from(b"b\x00\x00\x00\x0bHello World"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Bytes(b"Hello World".to_vec())); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_str() { - let mut parser = Parser::from(b"s\x00\x00\x00\x0bHello World"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Str("Hello World".to_owned())); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_object() { - let mut parser = Parser::from(b"o\x00\x00\x00\x0bHello World"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Object(b"Hello World".to_vec())); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_enum() { - let mut parser = Parser::from(b"ei\x00\x00\x00\x01\x00\x00\x00\xfe"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Enum(1, Box::new(Value::Int(0xfe)))); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_null() { - let mut parser = Parser::from(b"n"); - let val = parser.next_value().unwrap(); - assert_eq!(val, Value::Null); - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_array() { - let mut parser = Parser::from(b"ac\x00\x00\x00\x04\x01\x02\x03\x04"); - let val = parser.next_value().unwrap(); - if let Value::List(val) = val { - assert_eq!(val.len(), 4); - assert_eq!(val.get(0).unwrap().to_owned(), Value::Char(1)); - assert_eq!(val.get(1).unwrap().to_owned(), Value::Char(2)); - assert_eq!(val.get(2).unwrap().to_owned(), Value::Char(3)); - assert_eq!(val.get(3).unwrap().to_owned(), Value::Char(4)); - } else { - panic!("value is not a list"); - } - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_2d_array() { - let mut parser = Parser::from( - b"aac\x00\x00\x00\x05\x00\x00\x00\x03\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - ); - let val = parser.next_value().unwrap(); - if let Value::List(val) = val { - let mut i = 1; - for v in val { - if let Value::List(v) = v { - for item in v { - assert_eq!(item, Value::Char(i)); - i += 1; - } - } else { - panic!("nested value is not a list"); - } - } - } else { - panic!("value is not a list") - } - } - - #[test] - fn test_list() { - let mut parser = Parser::from(b"A\x00\x00\x00\x08c\x01c\x02c\x03c\x04"); - let val = parser.next_value().unwrap(); - if let Value::List(val) = val { - assert_eq!(val.len(), 4); - assert_eq!(val.get(0).unwrap().to_owned(), Value::Char(1)); - assert_eq!(val.get(1).unwrap().to_owned(), Value::Char(2)); - assert_eq!(val.get(2).unwrap().to_owned(), Value::Char(3)); - assert_eq!(val.get(3).unwrap().to_owned(), Value::Char(4)); - } else { - panic!("value is not a list"); - } - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_map() { - let mut parser = - Parser::from(b"M\x00\x00\x00\x10s\x00\x00\x00\x01ac\x01s\x00\x00\x00\x01bc\x02"); - let val = parser.next_value().unwrap(); - if let Value::Map(val) = val { - assert_eq!(val.len(), 2); - assert_eq!( - val.get(0).unwrap().to_owned(), - (Value::Str("a".to_owned()), Value::Char(1)) - ); - assert_eq!( - val.get(1).unwrap().to_owned(), - (Value::Str("b".to_owned()), Value::Char(2)) - ); - } else { - panic!("value is not a map"); - } - assert_eq!(parser.0.is_empty(), true); - } - - #[test] - fn test_eof() { - let mut parser = Parser::from(b"i\x00\x0a"); - - let err = parser.next_value().expect_err("UnexpectedEof Error"); - - if let Error::IO(e) = err { - if e.kind() != std::io::ErrorKind::UnexpectedEof { - panic!("Expected UnexpectedEof Error"); - } - } else { - panic!("Expected UnexpectedEof Error"); - } - } - - #[test] - fn test_list_too_big() { - let mut parser = Parser::from(b"A\x00\x00\x00\x04c\x01i\x00\x00\x00\x00"); - - let err = parser.next_value().expect_err("DataError"); - if let Error::DataError(_) = err { - } else { - panic!("Expected a DataError"); - } - } - - #[test] - fn test_map_too_big() { - let mut parser = Parser::from(b"M\x00\x00\x00\x04c\x01i\x00\x00\x00\x00"); - - let err = parser.next_value().expect_err("DataError"); - if let Error::DataError(_) = err { - } else { - panic!("Expected a DataError"); - } - } -} From d5e4fee267b17412d3646e2a191e29c3e6e042be Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Sun, 3 Mar 2024 01:25:16 -0700 Subject: [PATCH 05/10] Add FileBufferAsync --- Cargo.lock | 40 ++ Cargo.toml | 12 +- src/buffer.rs | 1097 +++++++++++++++++++++++++++++++++++------------- src/channel.rs | 28 ++ src/engine.rs | 22 +- 5 files changed, 903 insertions(+), 296 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 295e698..17e12bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -36,6 +36,17 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "async-generic" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5c033acf5a4f968ece4c8b18c2444b4d4b545fecebfe4b90592a4265643421d" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "async-trait" version = "0.1.77" @@ -68,6 +79,12 @@ version = "1.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + [[package]] name = "cc" version = "1.0.88" @@ -186,10 +203,12 @@ version = "0.3.0+nightly" dependencies = [ "anyhow", "async-channel", + "async-generic", "async-trait", "byteorder", "enum-as-inner", "maybe-async", + "pin-project", "rand", "thiserror", "tokio", @@ -235,6 +254,26 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +[[package]] +name = "pin-project" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -339,6 +378,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", + "bytes", "num_cpus", "pin-project-lite", ] diff --git a/Cargo.toml b/Cargo.toml index 77a7916..edeff89 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,9 +21,11 @@ anyhow = "1" thiserror = "1" # Async Dependencies -async-channel = { version = "2", optional = true} -tokio = { version = "1", features = ["rt-multi-thread"], optional = true } +async-generic = { version = "1.0.0", optional = true } +async-channel = { version = "2", optional = true } +tokio = { version = "1", features = ["rt-multi-thread", "io-util"], optional = true } async-trait = { version = "0.1", optional = true } +pin-project = "1.1.4" [dev-dependencies] rand = "0.8" @@ -33,12 +35,12 @@ rand = "0.8" # serde = {version = "1.0", features = ["derive"] } [features] -default = ["sync"] -# default = ["async-tokio"] +# default = ["sync"] +default = ["async-tokio"] sync = ["maybe-async/is_sync"] -async = ["dep:async-channel", "dep:async-trait"] +async = ["dep:async-channel", "dep:async-generic", "dep:async-trait"] async-tokio = ["async", "dep:tokio"] diff --git a/src/buffer.rs b/src/buffer.rs index 73feae2..7bebed2 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,53 +1,36 @@ -//! Contains [BufferedReadWrite], which is a wrapper for files. +//! Contains [FileBuffer], which is a wrapper for files. //! -//! It currently can only be implemented synchronously which requires that -//! operations are executed in a spawn_blocking context. This isn't the worst, -//! but it would be nice to be able to utilize async io as it is natively -//! supported by tokio. +//! There is now an asynchronous implementation of [FileBuffer]: +//! [FileBufferAsync]. It has not been tested yet, but I like they way it is +//! implemented and should be implemented in a similar way for [FileBuffer] +//! (Rather than doing work upfront, FileBuffer should instead perform actions +//! as it goes). //! -//! I can't just write an AsyncRead/AsyncWrite wrapper since it requires a state -//! that would make the current implementation way too complex. If I were to -//! implement AsyncReadExt/AsyncWriteExt which have a nicer, I would also need -//! to implement the base traits. It's possible that I could just implement the -//! base trait and panic if the base trait is called, but I don't know how I -//! feel about that. -//! -//! I could make a custom trait that all asyncReadExt objects would implement, -//! but then users would need to import that custom trait whenever they are -//! using the engine which doesn't sound great either. -//! -//! Also, how would I deal with files that are provided that are sync only, such -//! as with a `vec`? When in async mode, I would have to have two -//! implementations available for whether F is async or not. +//! The Buffer helper struct also needs to be majorly cleaned up. I'm tired, +//! good night. 😴 use std::{ - collections::{BinaryHeap, HashMap, HashSet}, + collections::{BTreeSet, BinaryHeap, HashMap}, mem, + ops::Range, + pin::Pin, + task::{Context, Poll}, }; use std::io::{self, Read, Seek, SeekFrom, Write}; +use enum_as_inner::EnumAsInner; +use pin_project::pin_project; +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; + struct Block { data: Vec, access: u64, } -/// A wrapper for files which holds a buffer for the file. -/// -/// This buffer can hold the entire file in memory and has an option to limit -/// how much data is stored in memory (the default limit is 1GiB). -/// -/// Reads and writes are performed in blocks (the default block size is 512 -/// bytes). -/// -/// This struct is designed to work with simultaneous read/write operations. -/// -/// No writes occur to the underlying file until either flush is called, or the -/// cache limit has been met. -pub struct BufferedReadWrite { +struct Buffer { blocks: HashMap, - modified: HashSet, - file: F, + modified: BTreeSet, block_size: u64, max_blocks: usize, ideal_blocks: usize, @@ -78,9 +61,252 @@ macro_rules! get_block { }}; } -/// Builder for [BufferedReadWrite]. -pub struct BufferedReadWriteBuilder { +impl Buffer { + fn purge_least_recently_used(&mut self) { + let n_blocks = self.blocks.len() - self.ideal_blocks; + let mut to_delete = BinaryHeap::new(); + + for (k, v) in &self.blocks { + if self.modified.contains(&k) { + // Don't try to clean modified blocks + continue; + } + if to_delete.len() < n_blocks { + to_delete.push((v.access, *k)); + } else if let Some((access, _)) = to_delete.peek() { + if v.access <= *access { + to_delete.push((v.access, *k)); + } + if to_delete.len() > n_blocks { + to_delete.pop(); + } + } + } + + for (_, k) in to_delete { + self.blocks.remove(&k); + } + } + + /// Read from the buffer. + /// + /// The buffer must be pre-loaded in order for this to work. + fn read_buffered(&mut self, buf: &mut [u8]) -> io::Result { + let mut offset = 0; + let mut sect = self.cursor / self.block_size; + let mut sect_index = self.cursor % self.block_size; + let mut block = get_block!(self, sect); + + while let Some(buffer) = block { + let buffer = &buffer[sect_index as usize..]; + let b = &mut buf[offset..]; + let to_read = buffer.len().min(b.len()); + + let b = &mut b[..to_read]; + b.copy_from_slice(&buffer[..to_read]); + offset += to_read; + self.cursor += to_read as u64; + if offset == buf.len() { + break; + } + + sect += 1; + sect_index = 0; + block = get_block!(self, sect); + } + + if self.blocks.len() > self.max_blocks { + self.purge_least_recently_used(); + } + + Ok(offset) + } + + fn get_next_block(&mut self) -> Result<(usize, &mut Vec), (usize, u64)> { + let block = self.cursor / self.block_size; + let offset = (self.cursor % self.block_size) as usize; + match get_block!(mut self, block) { + Some(data) => Ok((offset, data)), + None => Err((offset, block)), + } + } + + fn get_next_block_modify(&mut self) -> Result<(usize, &mut Vec), (usize, u64)> { + let block = self.cursor / self.block_size; + let offset = (self.cursor % self.block_size) as usize; + match get_block!(mut self, block) { + Some(data) => { + self.modified.insert(block); + Ok((offset, data)) + } + None => Err((offset, block)), + } + } + + fn get_next_modified_block(&self) -> Option { + self.modified.first().copied() + } + + fn iter_blocks(&self, position: u64, len: u64) -> Range { + let end = position + len; + let block = position / self.block_size; + let end_block = (end + self.block_size - 1) / self.block_size; + let num_blocks = end_block - block; + + block..block + num_blocks + } + + fn to_load(&self, position: u64, len: u64) -> Vec { + let mut to_load = Vec::new(); + + for sect in self.iter_blocks(position, len) { + if !self.blocks.contains_key(§) { + to_load.push(sect); + } + } + + to_load + } + + /// Write to the internal buffer + /// + /// Any pre-existing blocks must already be loaded for this to work. + /// + /// No writes to the disk will occur + fn write(&mut self, buf: &[u8]) -> usize { + let mut offset = 0; + let mut sect = self.cursor / self.block_size; + let mut sect_index = self.cursor % self.block_size; + let block_size = self.block_size; + let mut block = get_block!(mut self, sect); + + while let Some(buffer) = block { + let write = &mut buffer[sect_index as usize..]; + let read = &buf[offset..]; + let to_write = write.len().min(read.len()); + + let write = &mut write[..to_write]; + write.copy_from_slice(&read[..to_write]); + self.modified.insert(sect); + + self.cursor += to_write as u64; + offset += to_write; + + if offset == buf.len() { + break; + } + + if (buffer.len() as u64) < block_size { + // If the block isn't a full block, write to the end of the block + + let mut write = vec![0u8; block_size as usize - buffer.len()]; + let read = &buf[offset..]; + let to_write = write.len().min(read.len()); + + let write = &mut write[..to_write]; + write.copy_from_slice(&read[..to_write]); + buffer.extend_from_slice(write); + self.cursor += to_write as u64; + + offset += to_write; + + if offset == buf.len() { + break; + } + } + + sect += 1; + sect_index = 0; + block = get_block!(mut self, sect); + } + + while offset < buf.len() { + // There are new blocks to write + + let read = &buf[offset..]; + let max_write = self.max_blocks - sect_index as usize; + let to_write = max_write.min(read.len()); + + let mut buffer = vec![0u8; sect_index as usize + to_write]; + + let write = &mut buffer[sect_index as usize..]; + write.copy_from_slice(read); + + self.cursor += to_write as u64; + offset += to_write; + + self.blocks.insert( + sect, + Block { + data: buffer, + access: self.access_count, + }, + ); + self.access_count += 1; + self.modified.insert(sect); + + sect += 1; + sect_index = 0; + } + + offset + } + + fn take_modified(&mut self) -> Vec { + let mut modified: Vec<_> = mem::take(&mut self.modified).into_iter().collect(); + modified.sort_unstable(); + modified + } + + fn get_block_mut(&mut self, block: u64) -> Option<&mut Vec> { + get_block!(mut self, block) + } + + fn is_full(&self) -> bool { + self.blocks.len() > self.max_blocks + } + + fn insert(&mut self, block: u64, data: Vec) { + self.blocks.insert( + block, + Block { + data, + access: self.access_count, + }, + ); + self.access_count += 1; + } + + #[inline] + fn mark_modified(&mut self, block: u64) { + self.modified.insert(block); + } + + #[inline] + fn mark_clean(&mut self, block: u64) { + self.modified.remove(&block); + } +} + +/// A wrapper for files which holds a buffer for the file. +/// +/// This buffer can hold the entire file in memory and has an option to limit +/// how much data is stored in memory (the default limit is 1GiB). +/// +/// Reads and writes are performed in blocks (the default block size is 512 +/// bytes). +/// +/// This struct is designed to work with simultaneous read/write operations. +/// +/// No writes occur to the underlying file until either flush is called, or the +/// cache limit has been met. +pub struct FileBuffer { + buffer: Buffer, file: F, +} + +/// Builder for [FileBuffer]. +pub struct FileBufferBuilder { block_size: Option, max_blocks: Option, ideal_blocks: Option, @@ -88,7 +314,17 @@ pub struct BufferedReadWriteBuilder { ideal_cache: Option, } -impl BufferedReadWriteBuilder { +impl FileBufferBuilder { + pub fn new() -> Self { + FileBufferBuilder { + block_size: None, + max_blocks: None, + max_cache: None, + ideal_cache: None, + ideal_blocks: None, + } + } + /// Set the number of bytes in a block. /// /// The default is 512 bytes. @@ -152,8 +388,7 @@ impl BufferedReadWriteBuilder { self } - /// Create the BufferedReadWrite object - pub fn build(self) -> BufferedReadWrite { + fn build(self) -> Buffer { let block_size = self.block_size.unwrap_or(512); let max_blocks = self .max_blocks @@ -172,11 +407,9 @@ impl BufferedReadWriteBuilder { max_blocks - blocks }); - // .unwrap_or_else(|| (self.rec_cache.unwrap_or(1_000_000_000) / block_size) as usize); - BufferedReadWrite { + Buffer { blocks: HashMap::new(), - modified: HashSet::new(), - file: self.file, + modified: BTreeSet::new(), cursor: 0, block_size, max_blocks, @@ -184,111 +417,47 @@ impl BufferedReadWriteBuilder { access_count: 0, } } -} -impl From for BufferedReadWrite -where - F: Seek, -{ - fn from(value: F) -> Self { - Self::new(value).build() - } -} + /// Create the FileBuffer object + pub fn build_sync(self, f: F) -> FileBuffer { + let buffer = self.build(); -impl BufferedReadWrite -where - F: Seek, -{ - /// Create a new BufferedReadWriteBuilder. - /// - /// ```no_run - /// use mbon::buffer::BufferedReadWrite; - /// use std::fs::File; - /// - /// let file = File::options() - /// .read(true) - /// .write(true) - /// .create(true) - /// .open("my_file.mbon").unwrap(); - /// let f = BufferedReadWrite::new(file).build(); - /// ``` - #[inline] - pub fn new(file: F) -> BufferedReadWriteBuilder { - BufferedReadWriteBuilder { - file, - block_size: None, - max_blocks: None, - max_cache: None, - ideal_cache: None, - ideal_blocks: None, - } + FileBuffer { file: f, buffer } } - /// Purge the n_blocks least recently used blocks from the cache. - /// - /// This will ignore any blocks that have been modified. - fn purge_least_recently_used(&mut self, n_blocks: usize) { - println!( - "Clearing {n_blocks} blocks to {}", - self.blocks.len() - n_blocks - ); - let mut to_delete = BinaryHeap::new(); - - for (k, v) in &self.blocks { - if self.modified.contains(&k) { - // Don't try to clean modified blocks - continue; - } - if to_delete.len() < n_blocks { - to_delete.push((v.access, *k)); - } else if let Some((access, _)) = to_delete.peek() { - if v.access <= *access { - to_delete.push((v.access, *k)); - } - if to_delete.len() > n_blocks { - to_delete.pop(); - } - } - } + pub fn build_async(self, f: F) -> FileBufferAsync { + let buffer = self.build(); - for (_, k) in to_delete { - self.blocks.remove(&k); + FileBufferAsync { + file: f, + buffer, + cursor: None, + state: AsyncFileBufState::default(), } } - - /// Clear the cache without flushing the file. - /// - /// This will preserve any cached blocks that have been modified. - pub fn clear_cache_no_flush(&mut self) { - let blocks = mem::take(&mut self.blocks); - self.blocks = blocks - .into_iter() - .filter(|(k, _)| self.modified.contains(k)) - .collect(); - } } -impl BufferedReadWrite +impl FileBuffer where F: Write + Seek, { fn flush_blocks(&mut self) -> io::Result<()> { - // I'm sorting here because I would assume that it is quicker for the file system to write - // in order than it would be to write in a random order. - let mut modified: Vec<_> = mem::take(&mut self.modified).into_iter().collect(); - modified.sort_unstable(); + let modified = self.buffer.take_modified(); let mut position = match modified.first() { - Some(sect) => self.file.seek(SeekFrom::Start(sect * self.block_size))?, + Some(sect) => self + .file + .seek(SeekFrom::Start(sect * self.buffer.block_size))?, None => self.file.seek(SeekFrom::Current(0))?, }; + let block_size = self.buffer.block_size; - for sect in modified { - let buf = match get_block!(mut self, sect) { + for block in modified { + let buf = match self.buffer.get_block_mut(block) { Some(b) => b, None => continue, }; - let pos = sect * self.block_size; + let pos = block * block_size; if position != pos { self.file.seek(SeekFrom::Start(pos))?; position = pos; @@ -299,8 +468,8 @@ where } self.file.flush()?; - if self.blocks.len() > self.max_blocks { - self.purge_least_recently_used(self.blocks.len() - self.ideal_blocks); + if self.buffer.is_full() { + self.buffer.purge_least_recently_used(); } Ok(()) @@ -312,38 +481,26 @@ where /// clearing the cache. pub fn clear_cache(&mut self) -> io::Result<()> { self.flush_blocks()?; - self.blocks.clear(); + self.buffer.blocks.clear(); Ok(()) } } -impl BufferedReadWrite +impl FileBuffer where F: Read + Seek, { fn load_blocks(&mut self, position: u64, len: u64) -> io::Result<()> { - let end = position + len; - let block = position / self.block_size; - let end_block = (end + self.block_size - 1) / self.block_size; - let num_blocks = end_block - block; - - let mut to_load = Vec::new(); - - for sect in block..block + num_blocks { - if !self.blocks.contains_key(§) { - to_load.push(sect); - } - } + let to_load = self.buffer.to_load(position, len); let mut position = self.file.seek(SeekFrom::Current(0))?; - for sect in to_load { - let pos = sect * self.block_size; + let pos = sect * self.buffer.block_size; if position != pos { self.file.seek(SeekFrom::Start(pos))?; position = pos; } - let mut buf = vec![0u8; self.block_size as usize]; + let mut buf = vec![0u8; self.buffer.block_size as usize]; let mut offset = 0; let mut eof = false; @@ -368,14 +525,8 @@ where buf.remove(i); } - self.blocks.insert( - sect, - Block { - data: buf, - access: self.access_count, - }, - ); - self.access_count += 1; + self.buffer.insert(sect, buf); + if eof { break; } @@ -385,156 +536,540 @@ where } } -impl Read for BufferedReadWrite +impl Read for FileBuffer where F: Read + Seek, { fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.load_blocks(self.cursor, buf.len() as u64)?; - let mut offset = 0; - let mut sect = self.cursor / self.block_size; - let mut sect_index = self.cursor % self.block_size; - let mut block = get_block!(self, sect); - - while let Some(buffer) = block { - let buffer = &buffer[sect_index as usize..]; - let b = &mut buf[offset..]; - let to_read = buffer.len().min(b.len()); + self.load_blocks(self.buffer.cursor, buf.len() as u64)?; + self.buffer.read_buffered(buf) + } +} - let b = &mut b[..to_read]; - b.copy_from_slice(&buffer[..to_read]); - offset += to_read; - self.cursor += to_read as u64; - if offset == buf.len() { - break; - } +impl Write for FileBuffer +where + F: Read + Write + Seek, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + self.load_blocks(self.buffer.cursor, buf.len() as u64)?; - sect += 1; - sect_index = 0; - block = get_block!(self, sect); - } + let offset = self.buffer.write(buf); - if self.blocks.len() > self.max_blocks { - self.purge_least_recently_used(self.blocks.len() - self.ideal_blocks); + if self.buffer.is_full() { + self.flush_blocks()?; } Ok(offset) } + + fn flush(&mut self) -> io::Result<()> { + self.flush_blocks() + } } -impl Write for BufferedReadWrite +impl Seek for FileBuffer where - F: Read + Write + Seek, + F: Seek, { - fn write(&mut self, buf: &[u8]) -> io::Result { - self.load_blocks(self.cursor, buf.len() as u64)?; - let mut offset = 0; - let mut sect = self.cursor / self.block_size; - let mut sect_index = self.cursor % self.block_size; - let block_size = self.block_size; - let mut block = get_block!(mut self, sect); + fn seek(&mut self, pos: SeekFrom) -> io::Result { + match pos { + SeekFrom::Start(p) => self.buffer.cursor = p, + SeekFrom::End(_) => { + self.buffer.cursor = self.file.seek(pos)?; + } + SeekFrom::Current(o) => { + self.buffer.cursor = self.buffer.cursor.checked_add_signed(o).ok_or_else(|| { + io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot seek to a negative position", + ) + })? + } + } + Ok(self.buffer.cursor) + } +} - while let Some(buffer) = block { - let write = &mut buffer[sect_index as usize..]; - let read = &buf[offset..]; - let to_write = write.len().min(read.len()); +#[derive(EnumAsInner)] +enum AsyncFileBufState { + Normal, + StartSeek(SeekFrom), + Seeking, + Reading { + block: u64, + buf: Vec, + read: usize, + }, + Writing { + block: u64, + buf: Vec, + written: usize, + }, + Closing, +} - let write = &mut write[..to_write]; - write.copy_from_slice(&read[..to_write]); - self.modified.insert(sect); +impl Default for AsyncFileBufState { + fn default() -> Self { + Self::Normal + } +} - self.cursor += to_write as u64; - offset += to_write; +#[pin_project] +pub struct FileBufferAsync { + buffer: Buffer, + #[pin] + file: F, + cursor: Option, + state: AsyncFileBufState, +} +impl FileBufferAsync { + fn internal_poll_read_block( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + block: u64, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + let block_size = self.buffer.block_size; + match state { + AsyncFileBufState::Normal => { + if self + .as_mut() + .internal_cursor_try_seek(cx, SeekFrom::Start(block * block_size))? + .is_pending() + { + return Poll::Pending; + } - if offset == buf.len() { - break; + let me = self.as_mut().project(); + *me.state = AsyncFileBufState::Reading { + block, + buf: vec![0u8; block_size as usize], + read: 0, + }; + self.internal_poll_read_block(cx, block) } + AsyncFileBufState::Reading { + block, + mut buf, + read, + } => { + let me = self.as_mut().project(); + debug_assert_eq!(*me.cursor, Some(block * block_size + read as u64)); + + let mut b = ReadBuf::new(buf.as_mut_slice()); + b.set_filled(read); + if me.file.poll_read(cx, &mut b)?.is_pending() { + return Poll::Pending; + } + let read = b.filled().len(); + *me.cursor = Some(block * me.buffer.block_size + read as u64); + if b.remaining() == 0 { + me.buffer.insert(block, buf); + return Poll::Ready(Ok(())); + } + *me.state = AsyncFileBufState::Reading { block, buf, read }; + self.internal_poll_read_block(cx, block) + } + AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Writing State", + ))), + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + state => { + let me = self.as_mut().project(); + *me.state = state; - if (buffer.len() as u64) < block_size { - // If the block isn't a full block, write to the end of the block - - let mut write = vec![0u8; block_size as usize - buffer.len()]; - let read = &buf[offset..]; - let to_write = write.len().min(read.len()); - - let write = &mut write[..to_write]; - write.copy_from_slice(&read[..to_write]); - buffer.extend_from_slice(write); - self.cursor += to_write as u64; - - offset += to_write; + let poll = self.as_mut().internal_cursor_poll_complete(cx)?; + if poll.is_ready() { + return self.internal_poll_read_block(cx, block); + } + Poll::Pending + } + } + } +} - if offset == buf.len() { - break; +impl AsyncRead for FileBufferAsync { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + while buf.remaining() > 0 { + let me = self.as_mut().project(); + match me.buffer.get_next_block() { + Ok((offset, block)) => { + let block = &block[offset..]; + let to_read = block.len().min(buf.remaining()); + buf.put_slice(&block[..to_read]); + me.buffer.cursor += to_read as u64; + continue; + } + Err((_offset, block)) => { + if self + .as_mut() + .internal_poll_read_block(cx, block)? + .is_pending() + { + return Poll::Pending; + } } } + } - sect += 1; - sect_index = 0; - block = get_block!(mut self, sect); + Poll::Ready(Ok(())) + } +} + +impl FileBufferAsync { + fn internal_cursor_try_seek( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + position: SeekFrom, + ) -> Poll> { + if self.state.is_normal() { + self.as_mut().internal_cursor_start_seek(position)?; } + self.internal_cursor_poll_complete(cx) + } - while offset < buf.len() { - // There are new blocks to write + fn internal_cursor_start_seek(self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + let me = self.project(); + *me.state = AsyncFileBufState::StartSeek(position); + Ok(()) + } - let read = &buf[offset..]; - let max_write = self.max_blocks - sect_index as usize; - let to_write = max_write.min(read.len()); + fn internal_cursor_poll_complete( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + match state { + AsyncFileBufState::Normal => Poll::Ready(Ok(0)), + AsyncFileBufState::StartSeek(seek) => match seek { + SeekFrom::Start(pos) => { + let me = self.as_mut().project(); + if let Some(actual) = me.cursor { + if *actual == pos { + return Poll::Ready(Ok(*actual)); + } + } + me.file.start_seek(SeekFrom::Start(pos))?; + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + *me.cursor = Some(pos); + return Poll::Ready(Ok(pos)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + seek => { + let me = self.as_mut().project(); + me.file.start_seek(seek)?; + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + *me.cursor = Some(pos); + return Poll::Ready(Ok(pos)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + }, + AsyncFileBufState::Seeking => { + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + *me.cursor = Some(pos); + return Poll::Ready(Ok(pos)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Reading State", + ))), + AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Writing State", + ))), + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + } + } +} - let mut buffer = vec![0u8; sect_index as usize + to_write]; +impl AsyncSeek for FileBufferAsync { + fn start_seek(self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + let me = self.project(); + *me.state = AsyncFileBufState::StartSeek(position); + Ok(()) + } - let write = &mut buffer[sect_index as usize..]; - write.copy_from_slice(read); + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let me = self.as_mut().project(); + let state = mem::take(me.state); + match state { + AsyncFileBufState::Normal => Poll::Ready(Ok(me.buffer.cursor)), + AsyncFileBufState::StartSeek(SeekFrom::Start(position)) => { + me.buffer.cursor = position; + Poll::Ready(Ok(me.buffer.cursor)) + } + AsyncFileBufState::StartSeek(SeekFrom::Current(offset)) => { + me.buffer.cursor = match me.buffer.cursor.checked_add_signed(offset) { + Some(v) => v, + None => { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot seek to a negative position", + ))) + } + }; + Poll::Ready(Ok(me.buffer.cursor)) + } + AsyncFileBufState::StartSeek(seek) => { + me.file.start_seek(seek)?; + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + me.buffer.cursor = pos; + *me.cursor = Some(pos); + return Poll::Ready(Ok(me.buffer.cursor)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + AsyncFileBufState::Seeking => { + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + me.buffer.cursor = pos; + *me.cursor = Some(pos); + return Poll::Ready(Ok(me.buffer.cursor)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Reading State", + ))), + AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Writing State", + ))), + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + } + } +} - self.cursor += to_write as u64; - offset += to_write; +impl FileBufferAsync { + fn internal_start_write_block( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + block: u64, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + let block_size = self.buffer.block_size; + match state { + AsyncFileBufState::Normal => { + if self + .as_mut() + .internal_cursor_try_seek(cx, SeekFrom::Start(block * block_size))? + .is_pending() + { + return Poll::Pending; + } - self.blocks.insert( - sect, - Block { - data: buffer, - access: self.access_count, - }, - ); - self.access_count += 1; - self.modified.insert(sect); + let me = self.as_mut().project(); + let data = me.buffer.blocks.get(&block).ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidData, "Block does not exist") + })?; + *me.state = AsyncFileBufState::Writing { + block, + buf: data.data.clone(), + written: 0, + }; + self.internal_poll_write_block(cx) + } + AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Reading State", + ))), + AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Writing State", + ))), + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + state => { + let me = self.as_mut().project(); + *me.state = state; - sect += 1; - sect_index = 0; + let poll = self.as_mut().internal_cursor_poll_complete(cx)?; + if poll.is_ready() { + return self.internal_poll_write_block(cx); + } + Poll::Pending + } } - - if self.blocks.len() > self.max_blocks { - self.flush_blocks()?; + } + fn internal_poll_write_block( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + let block_size = self.buffer.block_size; + + match state { + AsyncFileBufState::Writing { + block, + buf, + written, + } => { + let me = self.as_mut().project(); + debug_assert_eq!(*me.cursor, Some(block * block_size + written as u64)); + + let poll = me.file.poll_write(cx, &buf[written..])?; + if let Poll::Ready(w) = poll { + let written = written + w; + *me.cursor = Some(block * block_size + written as u64); + if written == buf.len() { + me.buffer.mark_clean(block); + return Poll::Ready(Ok(())); + } + + *me.state = AsyncFileBufState::Writing { + block, + buf, + written, + }; + return self.internal_poll_write_block(cx); + } + *me.state = AsyncFileBufState::Writing { + block, + buf, + written, + }; + Poll::Pending + } + _ => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In an Invalid State", + ))), } - - Ok(offset) } +} - fn flush(&mut self) -> io::Result<()> { - self.flush_blocks() +impl AsyncWrite for FileBufferAsync { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let mut written = 0; + + let block_size = self.buffer.block_size as usize; + while written < buf.len() { + let me = self.as_mut().project(); + match me.buffer.get_next_block_modify() { + Ok((offset, data)) => { + let block = &mut data[offset..]; + let b = &buf[written..]; + + let to_write = block.len().min(b.len()); + (&mut block[..to_write]).copy_from_slice(&b[..to_write]); + written += to_write; + + if data.len() < block_size { + let b = &buf[written..]; + let to_extend = (block_size - data.len()).min(b.len()); + data.extend_from_slice(&b[..to_extend]); + me.buffer.cursor += to_extend as u64; + } + + me.buffer.cursor += to_write as u64; + + continue; + } + Err((offset, block)) => { + let me = self.as_mut().project(); + let buf = &buf[written..]; + + if offset == 0 && buf.len() > block_size { + // Overwrite the whole block without reading it + me.buffer.insert(block, Vec::from(&buf[..block_size])); + me.buffer.mark_modified(block); + written += block_size; + me.buffer.cursor += block_size as u64; + continue; + } + + // Return the number of successful bytes written if any + // before making a call to the file + if written > 0 { + return Poll::Ready(Ok(written)); + } + + if self + .as_mut() + .internal_poll_read_block(cx, block)? + .is_pending() + { + return Poll::Pending; + } + continue; + } + } + } + + Poll::Ready(Ok(written)) } -} -impl Seek for BufferedReadWrite -where - F: Seek, -{ - fn seek(&mut self, pos: SeekFrom) -> io::Result { - match pos { - SeekFrom::Start(p) => self.cursor = p, - SeekFrom::End(_) => { - self.cursor = self.file.seek(pos)?; + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + if self.state.is_writing() { + if self.as_mut().internal_poll_write_block(cx)?.is_pending() { + return Poll::Pending; } - SeekFrom::Current(o) => { - self.cursor = self.cursor.checked_add_signed(o).ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidInput, - "Cannot seek to a negative position", - ) - })? + } + while let Some(block) = self.buffer.get_next_modified_block() { + if self + .as_mut() + .internal_start_write_block(cx, block)? + .is_pending() + { + return Poll::Pending; } } - Ok(self.cursor) + + let me = self.as_mut().project(); + if me.file.poll_flush(cx)?.is_pending() { + return Poll::Pending; + } + + Poll::Ready(Ok(())) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + if self.state.is_closing() { + return self.project().file.poll_shutdown(cx); + } + if self.as_mut().poll_flush(cx)?.is_pending() { + return Poll::Pending; + } + let me = self.project(); + *me.state = AsyncFileBufState::Closing; + me.file.poll_shutdown(cx) } } @@ -542,7 +1077,7 @@ where mod test { use rand::{rngs::StdRng, Rng as _, SeedableRng}; use std::{ - io::{Cursor, Read, Seek, Write}, + io::{Cursor, Seek, Write}, slice, }; @@ -562,9 +1097,9 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_read() { let mut cursor = Cursor::new(FILE); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) - .build(); + .build_sync(&mut cursor); let mut buf = [0u8; 100]; for i in 0..(FILE.len() / 100) { @@ -581,9 +1116,9 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_write() { let mut cursor = Cursor::new(Vec::::new()); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) - .build(); + .build_sync(&mut cursor); let count = f.write(SHORT).unwrap(); assert_eq!(count, SHORT.len()); @@ -600,9 +1135,9 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_replace() { let mut cursor = Cursor::new(Vec::from(FILE)); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) - .build(); + .build_sync(&mut cursor); let written = f.write(b"Hello World").unwrap(); assert_eq!(written, 11); @@ -619,9 +1154,9 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_append() { let mut cursor = Cursor::new(Vec::from(SHORT)); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) - .build(); + .build_sync(&mut cursor); let written = f.write(FILE).unwrap(); assert_eq!(written, FILE.len()); @@ -638,9 +1173,9 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_replace_arbitrary() { let mut cursor = Cursor::new(Vec::from(FILE)); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) - .build(); + .build_sync(&mut cursor); f.seek(SeekFrom::Start(9)).unwrap(); let written = f.write(b"Hello World").unwrap(); @@ -658,17 +1193,17 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_read_cache_limit() { let mut cursor = Cursor::new(FILE); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) .with_max_blocks(13) - .build(); + .build_sync(&mut cursor); let mut buf = [0u8; 100]; for i in 0..(FILE.len() / 100) { let count = f.read(&mut buf).unwrap(); assert_eq!(count, 100); assert_eq!(buf.as_slice(), &FILE[i * 100..(i + 1) * 100]); - assert!(f.blocks.len() <= 13); + assert!(f.buffer.blocks.len() <= 13); } let count = f.read(&mut buf).unwrap(); @@ -679,9 +1214,9 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper #[test] fn test_read_after_end() { let mut cursor = Cursor::new(FILE); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) - .build(); + .build_sync(&mut cursor); let mut buf = [0u8; 100]; f.seek(SeekFrom::End(100)).unwrap(); @@ -693,10 +1228,10 @@ Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper fn test_random_writes() { let mut file = Vec::from(FILE); let mut cursor = Cursor::new(&mut file); - let mut f = BufferedReadWrite::new(&mut cursor) + let mut f = FileBufferBuilder::new() .with_block_size(13) .with_max_blocks(13) - .build(); + .build_sync(&mut cursor); let mut rng = StdRng::from_seed(*b"Hiya World This is a random seed"); // let mut rng = StdRng::from_entropy(); diff --git a/src/channel.rs b/src/channel.rs index dcd42b6..1e08f5a 100644 --- a/src/channel.rs +++ b/src/channel.rs @@ -1,3 +1,9 @@ +//! Redefinitions of channel +//! +//! When the feature `async` is enabled, then [async_channel] will be used. +//! +//! When the feature `sync` is enabled, then [std::sync::mpsc] will be used. + use maybe_async::maybe_async; #[cfg(feature = "async")] @@ -5,7 +11,9 @@ use async_channel::{unbounded as new_channel, Receiver, RecvError, SendError, Se #[cfg(feature = "sync")] use std::sync::mpsc::{channel as new_channel, Receiver, RecvError, SendError, Sender}; +/// The sending half of a channel pub struct ChanSend(pub Sender); +/// The receiving half of a channel pub struct ChanRecv(pub Receiver); impl Clone for ChanSend { @@ -22,17 +30,24 @@ pub fn channel() -> (ChanSend, ChanRecv) { #[maybe_async] impl ChanSend { + /// Send a message to the receiver #[inline] pub async fn send(&self, data: T) -> Result<(), SendError> { self.0.send(data).await } + /// Send a message to the receiver + /// + /// This is the same as [Self::send] when feature `sync` is set #[cfg(feature = "async")] #[inline] pub fn send_blocking(&self, data: T) -> Result<(), SendError> { self.0.send_blocking(data) } + /// Send a message to the receiver + /// + /// This is the same as [Self::send] when feature `sync` is set #[cfg(feature = "sync")] #[inline] pub fn send_blocking(&self, data: T) -> Result<(), SendError> { @@ -41,17 +56,30 @@ impl ChanSend { } #[maybe_async] impl ChanRecv { + /// Receive a message from a sender. + /// + /// This will wait until a message is ready #[inline] pub async fn recv(&self) -> Result { self.0.recv().await } + /// Receive a message from a sender. + /// + /// This will wait until a message is ready + /// + /// This is the same as [Self::recv] when feature `sync` is set #[cfg(feature = "async")] #[inline] pub fn recv_blocking(&self) -> Result { self.0.recv_blocking() } + /// Receive a message from a sender. + /// + /// This will wait until a message is ready + /// + /// This is the same as [Self::recv] when feature `sync` is set #[cfg(feature = "sync")] #[inline] pub fn recv_blocking(&self) -> Result { diff --git a/src/engine.rs b/src/engine.rs index 4a09801..7bdaecd 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -4,16 +4,18 @@ use std::{ fs::File, io::{self, Read, Seek, SeekFrom}, path::Path, - thread::JoinHandle, }; #[cfg(feature = "async")] use std::sync::{Arc, Mutex, MutexGuard}; #[cfg(feature = "async-tokio")] -use tokio::task::spawn_blocking; +use tokio::task::{spawn_blocking, JoinHandle}; + +#[cfg(feature = "sync")] +use std::thread::JoinHandle; use crate::{ - buffer::BufferedReadWrite, + buffer::{FileBuffer, FileBufferBuilder}, concurrent::{ConcurrentEngineClient, ConcurrentEngineWrapper}, data::{Data, PartialItem}, errors::{MbonError, MbonResult}, @@ -54,9 +56,9 @@ pub trait MbonParserRead { } #[cfg(feature = "sync")] -type Reader = BufferedReadWrite; +type Reader = FileBuffer; #[cfg(feature = "async")] -type Reader = Arc>>; +type Reader = Arc>>; /// Mbon Engine /// @@ -95,12 +97,12 @@ impl Engine { #[cfg(feature = "async")] impl Engine { #[inline] - fn get_file(&mut self) -> MutexGuard> { + fn get_file(&mut self) -> MutexGuard> { self.file.lock().unwrap() } #[inline] - fn new_file(f: BufferedReadWrite) -> Arc>> { + fn new_file(f: FileBuffer) -> Arc>> { Arc::new(Mutex::new(f)) } } @@ -108,11 +110,11 @@ impl Engine { #[cfg(feature = "sync")] impl Engine { #[inline] - fn get_file(&mut self) -> &mut BufferedReadWrite { + fn get_file(&mut self) -> &mut FileBuffer { &mut self.file } #[inline] - fn new_file(f: BufferedReadWrite) -> BufferedReadWrite { + fn new_file(f: FileBuffer) -> FileBuffer { f } } @@ -140,7 +142,7 @@ where /// Create a new engine from a file pub fn new(file: F) -> Self { Self { - file: Self::new_file(BufferedReadWrite::new(file).build()), + file: Self::new_file(FileBufferBuilder::new().build_sync(file)), } } From 392075f5935a7273ba90355e8e02e313e005c7ed Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Mon, 4 Mar 2024 01:14:34 -0700 Subject: [PATCH 06/10] Get FileBufferAsync Working --- Cargo.lock | 312 ++++++++- Cargo.toml | 7 +- resources/test/lorem_ipsom.txt | 9 + src/buffer.rs | 1184 ++++++-------------------------- src/buffer/async_buf.rs | 841 +++++++++++++++++++++++ src/buffer/sync_buf.rs | 539 +++++++++++++++ src/engine.rs | 4 +- test.sh | 4 + 8 files changed, 1922 insertions(+), 978 deletions(-) create mode 100644 resources/test/lorem_ipsom.txt create mode 100644 src/buffer/async_buf.rs create mode 100644 src/buffer/sync_buf.rs create mode 100755 test.sh diff --git a/Cargo.lock b/Cargo.lock index 17e12bd..50dabdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -58,6 +58,12 @@ dependencies = [ "syn", ] +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + [[package]] name = "backtrace" version = "0.3.69" @@ -73,6 +79,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "byteorder" version = "1.4.3" @@ -145,11 +157,94 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + [[package]] name = "futures-core" -version = "0.3.25" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "04909a7a7e4633ae6c4a9ab280aeb86da1236243a77b694a49eacd659a4bd3ac" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] [[package]] name = "getrandom" @@ -186,6 +281,16 @@ version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "maybe-async" version = "0.2.10" @@ -207,9 +312,11 @@ dependencies = [ "async-trait", "byteorder", "enum-as-inner", + "futures", "maybe-async", "pin-project", "rand", + "strum", "thiserror", "tokio", ] @@ -229,6 +336,17 @@ dependencies = [ "adler", ] +[[package]] +name = "mio" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" +dependencies = [ + "libc", + "wasi", + "windows-sys", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -254,6 +372,29 @@ version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + [[package]] name = "pin-project" version = "1.1.4" @@ -280,6 +421,12 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -334,12 +481,89 @@ dependencies = [ "getrandom", ] +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags", +] + [[package]] name = "rustc-demangle" version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "signal-hook-registry" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + +[[package]] +name = "socket2" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +dependencies = [ + "libc", + "windows-sys", +] + +[[package]] +name = "strum" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.26.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + [[package]] name = "syn" version = "2.0.50" @@ -379,8 +603,26 @@ checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" dependencies = [ "backtrace", "bytes", + "libc", + "mio", "num_cpus", + "parking_lot", "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "windows-sys", +] + +[[package]] +name = "tokio-macros" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -394,3 +636,69 @@ name = "wasi" version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/Cargo.toml b/Cargo.toml index edeff89..0f87840 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,9 +26,12 @@ async-channel = { version = "2", optional = true } tokio = { version = "1", features = ["rt-multi-thread", "io-util"], optional = true } async-trait = { version = "0.1", optional = true } pin-project = "1.1.4" +strum = { version = "0.26.1", features = ["derive"] } [dev-dependencies] +futures = "0.3.30" rand = "0.8" +tokio = { version = "1", features = ["full"] } # serde = "1.0" # [dev-dependencies] @@ -36,7 +39,7 @@ rand = "0.8" [features] # default = ["sync"] -default = ["async-tokio"] +# default = ["async-tokio"] sync = ["maybe-async/is_sync"] @@ -45,4 +48,4 @@ async-tokio = ["async", "dep:tokio"] [package.metadata.docs.rs] -all-features = true +features = ["async-tokio"] diff --git a/resources/test/lorem_ipsom.txt b/resources/test/lorem_ipsom.txt new file mode 100644 index 0000000..2fbcff8 --- /dev/null +++ b/resources/test/lorem_ipsom.txt @@ -0,0 +1,9 @@ +Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit. + +Iaculis eu non diam phasellus vestibulum lorem sed risus ultricies. Vitae ultricies leo integer malesuada nunc. Enim lobortis scelerisque fermentum dui faucibus in ornare. Et netus et malesuada fames. Dignissim enim sit amet venenatis urna cursus. Volutpat maecenas volutpat blandit aliquam etiam erat velit scelerisque in. Viverra nibh cras pulvinar mattis nunc sed blandit libero. Condimentum id venenatis a condimentum. Blandit cursus risus at ultrices. Auctor eu augue ut lectus arcu. Felis imperdiet proin fermentum leo vel. Imperdiet dui accumsan sit amet nulla facilisi morbi tempus. Sed velit dignissim sodales ut eu sem integer vitae. Auctor urna nunc id cursus metus. Mattis pellentesque id nibh tortor id aliquet. Vitae auctor eu augue ut lectus arcu bibendum. Nisl condimentum id venenatis a condimentum vitae. Fusce id velit ut tortor pretium. Dignissim enim sit amet venenatis urna cursus eget. Sit amet mauris commodo quis. + +Aliquam nulla facilisi cras fermentum odio eu feugiat pretium nibh. Tellus id interdum velit laoreet id donec ultrices tincidunt. Facilisis leo vel fringilla est ullamcorper eget. Orci phasellus egestas tellus rutrum tellus pellentesque. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. Cursus risus at ultrices mi tempus. Vitae auctor eu augue ut lectus arcu bibendum. Adipiscing elit duis tristique sollicitudin nibh sit amet commodo. Cursus mattis molestie a iaculis at erat pellentesque adipiscing. Suspendisse in est ante in nibh mauris. Scelerisque in dictum non consectetur a erat nam at lectus. Amet tellus cras adipiscing enim eu. + +Sem nulla pharetra diam sit amet nisl suscipit adipiscing bibendum. Quam pellentesque nec nam aliquam sem et tortor consequat id. In nibh mauris cursus mattis molestie. Fermentum et sollicitudin ac orci phasellus egestas tellus. Volutpat maecenas volutpat blandit aliquam etiam erat velit scelerisque. Sollicitudin aliquam ultrices sagittis orci a scelerisque purus. Molestie nunc non blandit massa enim nec dui nunc. Ac ut consequat semper viverra nam libero. Quam elementum pulvinar etiam non quam. In hac habitasse platea dictumst vestibulum rhoncus est. Volutpat est velit egestas dui id ornare. Sed sed risus pretium quam vulputate dignissim suspendisse. Lorem sed risus ultricies tristique. Nibh sit amet commodo nulla facilisi nullam vehicula. Vel pretium lectus quam id leo in vitae turpis massa. + +Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper velit sed ullamcorper morbi. Interdum velit euismod in pellentesque massa placerat. Phasellus faucibus scelerisque eleifend donec pretium vulputate. Amet nisl suscipit adipiscing bibendum. Quam viverra orci sagittis eu volutpat odio facilisis mauris. Gravida dictum fusce ut placerat. Eget duis at tellus at urna condimentum mattis pellentesque. Est pellentesque elit ullamcorper dignissim cras. Iaculis nunc sed augue lacus viverra vitae congue eu consequat. diff --git a/src/buffer.rs b/src/buffer.rs index 7bebed2..d4a18d2 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,33 +1,62 @@ -//! Contains [FileBuffer], which is a wrapper for files. +//! Contains [FileBuffer] and [FileBufferAsync], which is a wrapper for files. //! -//! There is now an asynchronous implementation of [FileBuffer]: -//! [FileBufferAsync]. It has not been tested yet, but I like they way it is -//! implemented and should be implemented in a similar way for [FileBuffer] -//! (Rather than doing work upfront, FileBuffer should instead perform actions -//! as it goes). +//! [FileBuffer] and [FileBufferAsync] wraps another Reader/Writer and is able +//! to hold a large buffer of the file and allows for seeks without clearing the +//! buffer. The buffer has a limited capacity which can be set with +//! [FileBufferOptions::with_max_cache()]/[FileBufferOptions::with_max_blocks()]. //! -//! The Buffer helper struct also needs to be majorly cleaned up. I'm tired, -//! good night. 😴 - +//! It does this by internally storing a series of blocks each of a +//! predetermined size ([FileBufferOptions::with_block_size()]). When the buffer +//! gets too big, then the least recently used blocks will be removed from the +//! cache. +//! +//! This wrapper is most useful for applications where the file is seeked often +//! and many reads/writes happen close together. +//! +//! In order to create a [FileBuffer] or [FileBufferAsync], the +//! [FileBufferOptions] must be used. +//! +//! [FileBufferAsync] is only available when the feature `async-tokio` is +//! enabled. +//! +//! ```no_run +//! use mbon::buffer::FileBufferOptions; +//! use std::fs::File; +//! +//! let file = File::options() +//! .read(true) +//! .write(true) +//! .open("my_file.mbon").unwrap(); +//! +//! let fb = FileBufferOptions::new() +//! .with_block_size(4096) +//! .with_max_cache(1_000_000) +//! .build(file); +//! ``` use std::{ collections::{BTreeSet, BinaryHeap, HashMap}, mem, - ops::Range, - pin::Pin, - task::{Context, Poll}, }; -use std::io::{self, Read, Seek, SeekFrom, Write}; +pub mod async_buf; +pub mod sync_buf; + +use std::io::{Read, Seek}; -use enum_as_inner::EnumAsInner; -use pin_project::pin_project; -use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncSeek}; + +pub use self::sync_buf::FileBuffer; + +#[cfg(feature = "async-tokio")] +pub use self::async_buf::FileBufferAsync; struct Block { data: Vec, access: u64, } +/// The internal buffer used by [FileBuffer] and [FileBufferAsync]. struct Buffer { blocks: HashMap, modified: BTreeSet, @@ -62,6 +91,10 @@ macro_rules! get_block { } impl Buffer { + /// Remove the least recently used blocks that have not been marked as + /// modified. + /// + /// This will reduce the blocks down to [Self::ideal_blocks]. fn purge_least_recently_used(&mut self) { let n_blocks = self.blocks.len() - self.ideal_blocks; let mut to_delete = BinaryHeap::new(); @@ -88,40 +121,22 @@ impl Buffer { } } - /// Read from the buffer. + /// Purge the cache only if the cache is full. /// - /// The buffer must be pre-loaded in order for this to work. - fn read_buffered(&mut self, buf: &mut [u8]) -> io::Result { - let mut offset = 0; - let mut sect = self.cursor / self.block_size; - let mut sect_index = self.cursor % self.block_size; - let mut block = get_block!(self, sect); - - while let Some(buffer) = block { - let buffer = &buffer[sect_index as usize..]; - let b = &mut buf[offset..]; - let to_read = buffer.len().min(b.len()); - - let b = &mut b[..to_read]; - b.copy_from_slice(&buffer[..to_read]); - offset += to_read; - self.cursor += to_read as u64; - if offset == buf.len() { - break; - } - - sect += 1; - sect_index = 0; - block = get_block!(self, sect); - } - - if self.blocks.len() > self.max_blocks { + /// The cache is considered full when there are more than [Self::max_blocks]. + fn purge_if_full(&mut self) { + if self.is_full() { self.purge_least_recently_used(); } - - Ok(offset) } + /// Get next block the cursor is pointing to. + /// + /// If the block exists, then [Ok] will be returned with the offset within + /// the block and the contents of the block. + /// + /// If the block doesn't exist, then [Err] will be returned with the offset + /// within the block and the block id. fn get_next_block(&mut self) -> Result<(usize, &mut Vec), (usize, u64)> { let block = self.cursor / self.block_size; let offset = (self.cursor % self.block_size) as usize; @@ -131,6 +146,13 @@ impl Buffer { } } + /// Get next block the cursor is pointing to and mark it as modified. + /// + /// If the block exists, then [Ok] will be returned with the offset within + /// the block and the contents of the block. + /// + /// If the block doesn't exist, then [Err] will be returned with the offset + /// within the block and the block id. fn get_next_block_modify(&mut self) -> Result<(usize, &mut Vec), (usize, u64)> { let block = self.cursor / self.block_size; let offset = (self.cursor % self.block_size) as usize; @@ -143,129 +165,34 @@ impl Buffer { } } + /// Get the next block id that has been marked as modified. fn get_next_modified_block(&self) -> Option { self.modified.first().copied() } - fn iter_blocks(&self, position: u64, len: u64) -> Range { - let end = position + len; - let block = position / self.block_size; - let end_block = (end + self.block_size - 1) / self.block_size; - let num_blocks = end_block - block; - - block..block + num_blocks - } - - fn to_load(&self, position: u64, len: u64) -> Vec { - let mut to_load = Vec::new(); - - for sect in self.iter_blocks(position, len) { - if !self.blocks.contains_key(§) { - to_load.push(sect); - } - } - - to_load - } - - /// Write to the internal buffer - /// - /// Any pre-existing blocks must already be loaded for this to work. - /// - /// No writes to the disk will occur - fn write(&mut self, buf: &[u8]) -> usize { - let mut offset = 0; - let mut sect = self.cursor / self.block_size; - let mut sect_index = self.cursor % self.block_size; - let block_size = self.block_size; - let mut block = get_block!(mut self, sect); - - while let Some(buffer) = block { - let write = &mut buffer[sect_index as usize..]; - let read = &buf[offset..]; - let to_write = write.len().min(read.len()); - - let write = &mut write[..to_write]; - write.copy_from_slice(&read[..to_write]); - self.modified.insert(sect); - - self.cursor += to_write as u64; - offset += to_write; - - if offset == buf.len() { - break; - } - - if (buffer.len() as u64) < block_size { - // If the block isn't a full block, write to the end of the block - - let mut write = vec![0u8; block_size as usize - buffer.len()]; - let read = &buf[offset..]; - let to_write = write.len().min(read.len()); - - let write = &mut write[..to_write]; - write.copy_from_slice(&read[..to_write]); - buffer.extend_from_slice(write); - self.cursor += to_write as u64; - - offset += to_write; - - if offset == buf.len() { - break; - } - } - - sect += 1; - sect_index = 0; - block = get_block!(mut self, sect); - } - - while offset < buf.len() { - // There are new blocks to write - - let read = &buf[offset..]; - let max_write = self.max_blocks - sect_index as usize; - let to_write = max_write.min(read.len()); - - let mut buffer = vec![0u8; sect_index as usize + to_write]; - - let write = &mut buffer[sect_index as usize..]; - write.copy_from_slice(read); - - self.cursor += to_write as u64; - offset += to_write; - - self.blocks.insert( - sect, - Block { - data: buffer, - access: self.access_count, - }, - ); - self.access_count += 1; - self.modified.insert(sect); - - sect += 1; - sect_index = 0; - } - - offset - } - + /// reset all blocks to be unmodified and return all that were previously + /// marked as modified fn take_modified(&mut self) -> Vec { let mut modified: Vec<_> = mem::take(&mut self.modified).into_iter().collect(); modified.sort_unstable(); modified } + /// Get the data from a block id. fn get_block_mut(&mut self, block: u64) -> Option<&mut Vec> { get_block!(mut self, block) } + /// Check if the cache is full. + /// + /// The cache is considered full when there are more than [Self::max_blocks]. fn is_full(&self) -> bool { self.blocks.len() > self.max_blocks } + /// Insert a new block into the buffer. + /// + /// The data is inserted into `block` id as is. fn insert(&mut self, block: u64, data: Vec) { self.blocks.insert( block, @@ -277,36 +204,29 @@ impl Buffer { self.access_count += 1; } + /// Mark a block as modified #[inline] fn mark_modified(&mut self, block: u64) { self.modified.insert(block); } + /// Mark a block as unmodified #[inline] fn mark_clean(&mut self, block: u64) { self.modified.remove(&block); } } -/// A wrapper for files which holds a buffer for the file. -/// -/// This buffer can hold the entire file in memory and has an option to limit -/// how much data is stored in memory (the default limit is 1GiB). +/// Set options for a [FileBuffer] or [FileBufferAsync]. /// -/// Reads and writes are performed in blocks (the default block size is 512 -/// bytes). +/// There are three options that can be set. /// -/// This struct is designed to work with simultaneous read/write operations. +/// * [Self::with_block_size()] Set the number of bytes in a block (default: 512) +/// * [Self::with_max_blocks()]/[Self::with_max_cache()] Set the max number of blocks in cache (default: 1GiB) +/// * [Self::with_ideal_blocks()]/[Self::with_ideal_blocks()] Set the number of blocks to reduce by (default: 1MiB) /// -/// No writes occur to the underlying file until either flush is called, or the -/// cache limit has been met. -pub struct FileBuffer { - buffer: Buffer, - file: F, -} - -/// Builder for [FileBuffer]. -pub struct FileBufferBuilder { +#[derive(Default, Clone, PartialEq, Eq)] +pub struct FileBufferOptions { block_size: Option, max_blocks: Option, ideal_blocks: Option, @@ -314,21 +234,17 @@ pub struct FileBufferBuilder { ideal_cache: Option, } -impl FileBufferBuilder { +impl FileBufferOptions { + /// Create a new [FileBufferOptions] object + #[inline] pub fn new() -> Self { - FileBufferBuilder { - block_size: None, - max_blocks: None, - max_cache: None, - ideal_cache: None, - ideal_blocks: None, - } + FileBufferOptions::default() } /// Set the number of bytes in a block. /// /// The default is 512 bytes. - pub fn with_block_size(mut self, block_size: u64) -> Self { + pub fn with_block_size(&mut self, block_size: u64) -> &mut Self { self.block_size = Some(block_size); self } @@ -342,7 +258,7 @@ impl FileBufferBuilder { /// /// Note that during a single read, the cache may become larger than the /// maximum cache for the duration of the read. - pub fn with_max_blocks(mut self, max_blocks: usize) -> Self { + pub fn with_max_blocks(&mut self, max_blocks: usize) -> &mut Self { self.max_blocks = Some(max_blocks); self.max_cache = None; self @@ -356,7 +272,7 @@ impl FileBufferBuilder { /// /// Note that during a single read, the cache may become larger than the /// maximum cache for the duration of the read. - pub fn with_max_cache(mut self, max_cache: u64) -> Self { + pub fn with_max_cache(&mut self, max_cache: u64) -> &mut Self { self.max_cache = Some(max_cache); self.max_blocks = None; self @@ -369,8 +285,8 @@ impl FileBufferBuilder { /// /// The default value is `max_cache - (1MiB, 1KiB, or max_cache / 5 /// /* Which ever is the largest value smaller than max_cache*/)`. - pub fn with_ideal_blocks(mut self, max_blocks: usize) -> Self { - self.ideal_blocks = Some(max_blocks); + pub fn with_ideal_blocks(&mut self, ideal_blocks: usize) -> &mut Self { + self.ideal_blocks = Some(ideal_blocks); self.ideal_cache = None; self } @@ -382,13 +298,14 @@ impl FileBufferBuilder { /// /// The default value is `max_cache - (1MiB, 1KiB, or max_cache / 5 /// /* Which ever is the largest value smaller than max_cache*/)`. - pub fn with_ideal_cache(mut self, max_cache: u64) -> Self { - self.ideal_cache = Some(max_cache); + pub fn with_ideal_cache(&mut self, ideal_cache: u64) -> &mut Self { + self.ideal_cache = Some(ideal_cache); self.ideal_blocks = None; self } - fn build(self) -> Buffer { + /// Build the Buffer + fn internal_build(&self) -> Buffer { let block_size = self.block_size.unwrap_or(512); let max_blocks = self .max_blocks @@ -418,837 +335,160 @@ impl FileBufferBuilder { } } - /// Create the FileBuffer object - pub fn build_sync(self, f: F) -> FileBuffer { - let buffer = self.build(); - - FileBuffer { file: f, buffer } - } - - pub fn build_async(self, f: F) -> FileBufferAsync { - let buffer = self.build(); - - FileBufferAsync { - file: f, - buffer, - cursor: None, - state: AsyncFileBufState::default(), - } - } -} - -impl FileBuffer -where - F: Write + Seek, -{ - fn flush_blocks(&mut self) -> io::Result<()> { - let modified = self.buffer.take_modified(); - - let mut position = match modified.first() { - Some(sect) => self - .file - .seek(SeekFrom::Start(sect * self.buffer.block_size))?, - None => self.file.seek(SeekFrom::Current(0))?, - }; - let block_size = self.buffer.block_size; - - for block in modified { - let buf = match self.buffer.get_block_mut(block) { - Some(b) => b, - None => continue, - }; - let pos = block * block_size; - if position != pos { - self.file.seek(SeekFrom::Start(pos))?; - position = pos; - } - - self.file.write_all(buf.as_slice())?; - position += buf.len() as u64; - } - self.file.flush()?; - - if self.buffer.is_full() { - self.buffer.purge_least_recently_used(); - } + /// Build a [FileBuffer] with a given stream. + /// + /// The stream must be at least a [Read] + [Seek] + pub fn build(&self, f: F) -> FileBuffer { + let buffer = self.internal_build(); - Ok(()) + FileBuffer::new(buffer, f) } - /// Clear the cache + /// Build a [FileBufferAsync] with a given async stream. /// - /// If there are any modified changes, they will be written to disk before - /// clearing the cache. - pub fn clear_cache(&mut self) -> io::Result<()> { - self.flush_blocks()?; - self.buffer.blocks.clear(); - Ok(()) - } -} - -impl FileBuffer -where - F: Read + Seek, -{ - fn load_blocks(&mut self, position: u64, len: u64) -> io::Result<()> { - let to_load = self.buffer.to_load(position, len); - - let mut position = self.file.seek(SeekFrom::Current(0))?; - for sect in to_load { - let pos = sect * self.buffer.block_size; - if position != pos { - self.file.seek(SeekFrom::Start(pos))?; - position = pos; - } - let mut buf = vec![0u8; self.buffer.block_size as usize]; - let mut offset = 0; - let mut eof = false; - - while offset < buf.len() { - let read = match self.file.read(&mut buf[offset..]) { - Ok(n) => n, - Err(err) => match err.kind() { - io::ErrorKind::Interrupted => { - continue; - } - _ => return Err(err), - }, - }; - if read == 0 { - eof = true; - break; - } - position += read as u64; - offset += read; - } - for i in (offset..buf.len()).rev() { - buf.remove(i); - } - - self.buffer.insert(sect, buf); - - if eof { - break; - } - } - - Ok(()) - } -} + /// The stream must be at least a [AsyncRead] + [AsyncSeek] + /// + /// This function is only available with the feature `async-tokio` enabled. + #[cfg(feature = "async-tokio")] + pub fn build_async(&self, f: F) -> FileBufferAsync { + let buffer = self.internal_build(); -impl Read for FileBuffer -where - F: Read + Seek, -{ - fn read(&mut self, buf: &mut [u8]) -> io::Result { - self.load_blocks(self.buffer.cursor, buf.len() as u64)?; - self.buffer.read_buffered(buf) + FileBufferAsync::new(buffer, f) } } -impl Write for FileBuffer -where - F: Read + Write + Seek, -{ - fn write(&mut self, buf: &[u8]) -> io::Result { - self.load_blocks(self.buffer.cursor, buf.len() as u64)?; +#[cfg(test)] +mod test_suite { + use std::{ + fs, + io::Read, + panic::{self, UnwindSafe}, + path::{Path, PathBuf}, + }; - let offset = self.buffer.write(buf); + use rand::{thread_rng, Rng as _}; - if self.buffer.is_full() { - self.flush_blocks()?; - } + const FILES: &'static str = concat!(env!("CARGO_MANIFEST_DIR"), "/resources/test"); - Ok(offset) + pub fn lorem_ipsom() -> PathBuf { + Path::new(FILES).join("lorem_ipsom.txt") } - fn flush(&mut self) -> io::Result<()> { - self.flush_blocks() + pub fn lorem_ipsom_content() -> Vec { + let mut buf = Vec::new(); + fs::File::open(lorem_ipsom()) + .unwrap() + .read_to_end(&mut buf) + .unwrap(); + buf } -} -impl Seek for FileBuffer -where - F: Seek, -{ - fn seek(&mut self, pos: SeekFrom) -> io::Result { - match pos { - SeekFrom::Start(p) => self.buffer.cursor = p, - SeekFrom::End(_) => { - self.buffer.cursor = self.file.seek(pos)?; - } - SeekFrom::Current(o) => { - self.buffer.cursor = self.buffer.cursor.checked_add_signed(o).ok_or_else(|| { - io::Error::new( - io::ErrorKind::InvalidInput, - "Cannot seek to a negative position", - ) - })? - } - } - Ok(self.buffer.cursor) + pub fn file_name(base: &str, id: &str, ext: &str) -> PathBuf { + Path::new(FILES).join(format!("{base}_{id}.{ext}")) } -} - -#[derive(EnumAsInner)] -enum AsyncFileBufState { - Normal, - StartSeek(SeekFrom), - Seeking, - Reading { - block: u64, - buf: Vec, - read: usize, - }, - Writing { - block: u64, - buf: Vec, - written: usize, - }, - Closing, -} -impl Default for AsyncFileBufState { - fn default() -> Self { - Self::Normal + pub fn copy_lorem_ipsom(id: &str) -> PathBuf { + let foo = file_name("lorem_ipsom_copy", id, "txt"); + fs::copy(Path::new(FILES).join("lorem_ipsom.txt"), &foo).unwrap(); + foo } -} -#[pin_project] -pub struct FileBufferAsync { - buffer: Buffer, - #[pin] - file: F, - cursor: Option, - state: AsyncFileBufState, -} -impl FileBufferAsync { - fn internal_poll_read_block( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - block: u64, - ) -> Poll> { - let state = mem::take(self.as_mut().project().state); - let block_size = self.buffer.block_size; - match state { - AsyncFileBufState::Normal => { - if self - .as_mut() - .internal_cursor_try_seek(cx, SeekFrom::Start(block * block_size))? - .is_pending() - { - return Poll::Pending; - } - - let me = self.as_mut().project(); - *me.state = AsyncFileBufState::Reading { - block, - buf: vec![0u8; block_size as usize], - read: 0, - }; - self.internal_poll_read_block(cx, block) - } - AsyncFileBufState::Reading { - block, - mut buf, - read, - } => { - let me = self.as_mut().project(); - debug_assert_eq!(*me.cursor, Some(block * block_size + read as u64)); - - let mut b = ReadBuf::new(buf.as_mut_slice()); - b.set_filled(read); - if me.file.poll_read(cx, &mut b)?.is_pending() { - return Poll::Pending; - } - let read = b.filled().len(); - *me.cursor = Some(block * me.buffer.block_size + read as u64); - if b.remaining() == 0 { - me.buffer.insert(block, buf); - return Poll::Ready(Ok(())); - } - *me.state = AsyncFileBufState::Reading { block, buf, read }; - self.internal_poll_read_block(cx, block) - } - AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Writing State", - ))), - AsyncFileBufState::Closing => { - Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + fn find_all_files(path: &Path) -> Vec { + let mut ents = Vec::new(); + for ent in fs::read_dir(path).unwrap() { + if ent.is_err() { + continue; } - state => { - let me = self.as_mut().project(); - *me.state = state; - - let poll = self.as_mut().internal_cursor_poll_complete(cx)?; - if poll.is_ready() { - return self.internal_poll_read_block(cx, block); - } - Poll::Pending + let ent = ent.unwrap(); + let meta = ent.metadata(); + if meta.is_err() { + continue; } - } - } -} + let meta = meta.unwrap(); -impl AsyncRead for FileBufferAsync { - fn poll_read( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut ReadBuf<'_>, - ) -> Poll> { - while buf.remaining() > 0 { - let me = self.as_mut().project(); - match me.buffer.get_next_block() { - Ok((offset, block)) => { - let block = &block[offset..]; - let to_read = block.len().min(buf.remaining()); - buf.put_slice(&block[..to_read]); - me.buffer.cursor += to_read as u64; - continue; - } - Err((_offset, block)) => { - if self - .as_mut() - .internal_poll_read_block(cx, block)? - .is_pending() - { - return Poll::Pending; - } - } + if meta.is_dir() { + ents.extend(find_all_files(ent.path().as_ref())); + } else { + ents.push(ent.path()); } } - Poll::Ready(Ok(())) - } -} - -impl FileBufferAsync { - fn internal_cursor_try_seek( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - position: SeekFrom, - ) -> Poll> { - if self.state.is_normal() { - self.as_mut().internal_cursor_start_seek(position)?; - } - self.internal_cursor_poll_complete(cx) + ents } - fn internal_cursor_start_seek(self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { - let me = self.project(); - *me.state = AsyncFileBufState::StartSeek(position); - Ok(()) - } + pub fn run_test(test: impl FnOnce(&str) + UnwindSafe) { + let next_byte = || { + const CHOICES: &'static str = + "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; + CHOICES + .chars() + .nth(thread_rng().gen_range(0..CHOICES.len())) + .unwrap() + }; - fn internal_cursor_poll_complete( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - let state = mem::take(self.as_mut().project().state); - match state { - AsyncFileBufState::Normal => Poll::Ready(Ok(0)), - AsyncFileBufState::StartSeek(seek) => match seek { - SeekFrom::Start(pos) => { - let me = self.as_mut().project(); - if let Some(actual) = me.cursor { - if *actual == pos { - return Poll::Ready(Ok(*actual)); - } - } - me.file.start_seek(SeekFrom::Start(pos))?; - let me = self.as_mut().project(); - let poll = me.file.poll_complete(cx)?; - if let Poll::Ready(pos) = poll { - *me.cursor = Some(pos); - return Poll::Ready(Ok(pos)); - } - *me.state = AsyncFileBufState::Seeking; - Poll::Pending - } - seek => { - let me = self.as_mut().project(); - me.file.start_seek(seek)?; - let me = self.as_mut().project(); - let poll = me.file.poll_complete(cx)?; - if let Poll::Ready(pos) = poll { - *me.cursor = Some(pos); - return Poll::Ready(Ok(pos)); - } - *me.state = AsyncFileBufState::Seeking; - Poll::Pending - } - }, - AsyncFileBufState::Seeking => { - let me = self.as_mut().project(); - let poll = me.file.poll_complete(cx)?; - if let Poll::Ready(pos) = poll { - *me.cursor = Some(pos); - return Poll::Ready(Ok(pos)); - } - *me.state = AsyncFileBufState::Seeking; - Poll::Pending - } - AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Reading State", - ))), - AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Writing State", - ))), - AsyncFileBufState::Closing => { - Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) - } + let mut buf = [' '; 16]; + for i in 0..buf.len() { + buf[i] = next_byte(); } - } -} + let id = buf.into_iter().collect::(); -impl AsyncSeek for FileBufferAsync { - fn start_seek(self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { - let me = self.project(); - *me.state = AsyncFileBufState::StartSeek(position); - Ok(()) - } + println!("Setup"); - fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let me = self.as_mut().project(); - let state = mem::take(me.state); - match state { - AsyncFileBufState::Normal => Poll::Ready(Ok(me.buffer.cursor)), - AsyncFileBufState::StartSeek(SeekFrom::Start(position)) => { - me.buffer.cursor = position; - Poll::Ready(Ok(me.buffer.cursor)) - } - AsyncFileBufState::StartSeek(SeekFrom::Current(offset)) => { - me.buffer.cursor = match me.buffer.cursor.checked_add_signed(offset) { - Some(v) => v, - None => { - return Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidInput, - "Cannot seek to a negative position", - ))) - } - }; - Poll::Ready(Ok(me.buffer.cursor)) - } - AsyncFileBufState::StartSeek(seek) => { - me.file.start_seek(seek)?; - let me = self.as_mut().project(); - let poll = me.file.poll_complete(cx)?; - if let Poll::Ready(pos) = poll { - me.buffer.cursor = pos; - *me.cursor = Some(pos); - return Poll::Ready(Ok(me.buffer.cursor)); - } - *me.state = AsyncFileBufState::Seeking; - Poll::Pending - } - AsyncFileBufState::Seeking => { - let poll = me.file.poll_complete(cx)?; - if let Poll::Ready(pos) = poll { - me.buffer.cursor = pos; - *me.cursor = Some(pos); - return Poll::Ready(Ok(me.buffer.cursor)); - } - *me.state = AsyncFileBufState::Seeking; - Poll::Pending - } - AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Reading State", - ))), - AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Writing State", - ))), - AsyncFileBufState::Closing => { - Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) - } - } - } -} + // let err = if _sync { + let err = panic::catch_unwind(|| test(&id)).err(); + // } else { + // test(id.clone()).catch_unwind().await.err() + // }; -impl FileBufferAsync { - fn internal_start_write_block( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - block: u64, - ) -> Poll> { - let state = mem::take(self.as_mut().project().state); - let block_size = self.buffer.block_size; - match state { - AsyncFileBufState::Normal => { - if self - .as_mut() - .internal_cursor_try_seek(cx, SeekFrom::Start(block * block_size))? - .is_pending() - { - return Poll::Pending; - } - - let me = self.as_mut().project(); - let data = me.buffer.blocks.get(&block).ok_or_else(|| { - io::Error::new(io::ErrorKind::InvalidData, "Block does not exist") - })?; - *me.state = AsyncFileBufState::Writing { - block, - buf: data.data.clone(), - written: 0, - }; - self.internal_poll_write_block(cx) - } - AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Reading State", - ))), - AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In a Writing State", - ))), - AsyncFileBufState::Closing => { - Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) - } - state => { - let me = self.as_mut().project(); - *me.state = state; + println!("Teardown"); - let poll = self.as_mut().internal_cursor_poll_complete(cx)?; - if poll.is_ready() { - return self.internal_poll_write_block(cx); + for file in find_all_files(FILES.as_ref()) { + if let Some(name) = file.file_name() { + if name.to_string_lossy().find(&id).is_some() { + println!("Removing {file:?}"); + fs::remove_file(file).unwrap(); } - Poll::Pending } } - } - fn internal_poll_write_block( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - let state = mem::take(self.as_mut().project().state); - let block_size = self.buffer.block_size; - - match state { - AsyncFileBufState::Writing { - block, - buf, - written, - } => { - let me = self.as_mut().project(); - debug_assert_eq!(*me.cursor, Some(block * block_size + written as u64)); - - let poll = me.file.poll_write(cx, &buf[written..])?; - if let Poll::Ready(w) = poll { - let written = written + w; - *me.cursor = Some(block * block_size + written as u64); - if written == buf.len() { - me.buffer.mark_clean(block); - return Poll::Ready(Ok(())); - } - - *me.state = AsyncFileBufState::Writing { - block, - buf, - written, - }; - return self.internal_poll_write_block(cx); - } - *me.state = AsyncFileBufState::Writing { - block, - buf, - written, - }; - Poll::Pending - } - _ => Poll::Ready(Err(io::Error::new( - io::ErrorKind::InvalidData, - "In an Invalid State", - ))), - } - } -} - -impl AsyncWrite for FileBufferAsync { - fn poll_write( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &[u8], - ) -> Poll> { - let mut written = 0; - - let block_size = self.buffer.block_size as usize; - while written < buf.len() { - let me = self.as_mut().project(); - match me.buffer.get_next_block_modify() { - Ok((offset, data)) => { - let block = &mut data[offset..]; - let b = &buf[written..]; - let to_write = block.len().min(b.len()); - (&mut block[..to_write]).copy_from_slice(&b[..to_write]); - written += to_write; - - if data.len() < block_size { - let b = &buf[written..]; - let to_extend = (block_size - data.len()).min(b.len()); - data.extend_from_slice(&b[..to_extend]); - me.buffer.cursor += to_extend as u64; - } - - me.buffer.cursor += to_write as u64; - - continue; - } - Err((offset, block)) => { - let me = self.as_mut().project(); - let buf = &buf[written..]; - - if offset == 0 && buf.len() > block_size { - // Overwrite the whole block without reading it - me.buffer.insert(block, Vec::from(&buf[..block_size])); - me.buffer.mark_modified(block); - written += block_size; - me.buffer.cursor += block_size as u64; - continue; - } - - // Return the number of successful bytes written if any - // before making a call to the file - if written > 0 { - return Poll::Ready(Ok(written)); - } - - if self - .as_mut() - .internal_poll_read_block(cx, block)? - .is_pending() - { - return Poll::Pending; - } - continue; - } - } + if let Some(err) = err { + panic::panic_any(err) } - - Poll::Ready(Ok(written)) } - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - if self.state.is_writing() { - if self.as_mut().internal_poll_write_block(cx)?.is_pending() { - return Poll::Pending; + /// Call [run_test()] as a wrapper so that any created files will be removed + /// afterwards, even if the test fails. + /// + /// ``` + /// use crate::file_test; + /// + /// file_test!(fn my_test() { + /// assert_eq!(1, 1); + /// }) + /// + /// file_test!(async fn my_test() { + /// assert_eq!(1, 1); + /// }) + /// ``` + #[macro_export] + macro_rules! file_test { + (fn $test:ident($($arg:ident: $type:ty),*) $body:expr) => { + #[test] + fn $test() { + run_test(|$($arg: $type),*| $body) } - } - while let Some(block) = self.buffer.get_next_modified_block() { - if self - .as_mut() - .internal_start_write_block(cx, block)? - .is_pending() - { - return Poll::Pending; + }; + (async fn $test:ident($($arg:ident: $type:ty),*) $body:expr) => { + #[test] + fn $test() { + run_test(|$($arg: $type),*| { + tokio::runtime::Builder::new_multi_thread() + .enable_all() + .build() + .unwrap() + .block_on(async move {$body}); + }) } - } - - let me = self.as_mut().project(); - if me.file.poll_flush(cx)?.is_pending() { - return Poll::Pending; - } - - Poll::Ready(Ok(())) - } - - fn poll_shutdown( - mut self: Pin<&mut Self>, - cx: &mut Context<'_>, - ) -> Poll> { - if self.state.is_closing() { - return self.project().file.poll_shutdown(cx); - } - if self.as_mut().poll_flush(cx)?.is_pending() { - return Poll::Pending; - } - let me = self.project(); - *me.state = AsyncFileBufState::Closing; - me.file.poll_shutdown(cx) - } -} - -#[cfg(test)] -mod test { - use rand::{rngs::StdRng, Rng as _, SeedableRng}; - use std::{ - io::{Cursor, Seek, Write}, - slice, - }; - - use super::*; - const FILE: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit. - -Iaculis eu non diam phasellus vestibulum lorem sed risus ultricies. Vitae ultricies leo integer malesuada nunc. Enim lobortis scelerisque fermentum dui faucibus in ornare. Et netus et malesuada fames. Dignissim enim sit amet venenatis urna cursus. Volutpat maecenas volutpat blandit aliquam etiam erat velit scelerisque in. Viverra nibh cras pulvinar mattis nunc sed blandit libero. Condimentum id venenatis a condimentum. Blandit cursus risus at ultrices. Auctor eu augue ut lectus arcu. Felis imperdiet proin fermentum leo vel. Imperdiet dui accumsan sit amet nulla facilisi morbi tempus. Sed velit dignissim sodales ut eu sem integer vitae. Auctor urna nunc id cursus metus. Mattis pellentesque id nibh tortor id aliquet. Vitae auctor eu augue ut lectus arcu bibendum. Nisl condimentum id venenatis a condimentum vitae. Fusce id velit ut tortor pretium. Dignissim enim sit amet venenatis urna cursus eget. Sit amet mauris commodo quis. - -Aliquam nulla facilisi cras fermentum odio eu feugiat pretium nibh. Tellus id interdum velit laoreet id donec ultrices tincidunt. Facilisis leo vel fringilla est ullamcorper eget. Orci phasellus egestas tellus rutrum tellus pellentesque. Enim nunc faucibus a pellentesque sit amet porttitor eget dolor. Cursus risus at ultrices mi tempus. Vitae auctor eu augue ut lectus arcu bibendum. Adipiscing elit duis tristique sollicitudin nibh sit amet commodo. Cursus mattis molestie a iaculis at erat pellentesque adipiscing. Suspendisse in est ante in nibh mauris. Scelerisque in dictum non consectetur a erat nam at lectus. Amet tellus cras adipiscing enim eu. - -Sem nulla pharetra diam sit amet nisl suscipit adipiscing bibendum. Quam pellentesque nec nam aliquam sem et tortor consequat id. In nibh mauris cursus mattis molestie. Fermentum et sollicitudin ac orci phasellus egestas tellus. Volutpat maecenas volutpat blandit aliquam etiam erat velit scelerisque. Sollicitudin aliquam ultrices sagittis orci a scelerisque purus. Molestie nunc non blandit massa enim nec dui nunc. Ac ut consequat semper viverra nam libero. Quam elementum pulvinar etiam non quam. In hac habitasse platea dictumst vestibulum rhoncus est. Volutpat est velit egestas dui id ornare. Sed sed risus pretium quam vulputate dignissim suspendisse. Lorem sed risus ultricies tristique. Nibh sit amet commodo nulla facilisi nullam vehicula. Vel pretium lectus quam id leo in vitae turpis massa. - -Nec ullamcorper sit amet risus nullam eget felis. Vestibulum mattis ullamcorper velit sed ullamcorper morbi. Interdum velit euismod in pellentesque massa placerat. Phasellus faucibus scelerisque eleifend donec pretium vulputate. Amet nisl suscipit adipiscing bibendum. Quam viverra orci sagittis eu volutpat odio facilisis mauris. Gravida dictum fusce ut placerat. Eget duis at tellus at urna condimentum mattis pellentesque. Est pellentesque elit ullamcorper dignissim cras. Iaculis nunc sed augue lacus viverra vitae congue eu consequat."; - - const SHORT: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit."; - - #[test] - fn test_read() { - let mut cursor = Cursor::new(FILE); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .build_sync(&mut cursor); - - let mut buf = [0u8; 100]; - for i in 0..(FILE.len() / 100) { - let count = f.read(&mut buf).unwrap(); - assert_eq!(count, 100); - assert_eq!(buf.as_slice(), &FILE[i * 100..(i + 1) * 100]); - } - - let count = f.read(&mut buf).unwrap(); - assert_eq!(count, 12); - assert_eq!(&buf[..count], &FILE[4100..4112]); - } - - #[test] - fn test_write() { - let mut cursor = Cursor::new(Vec::::new()); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .build_sync(&mut cursor); - - let count = f.write(SHORT).unwrap(); - assert_eq!(count, SHORT.len()); - f.flush().unwrap(); - - cursor.rewind().unwrap(); - - let mut buf = vec![0u8; SHORT.len()]; - let read = cursor.read(&mut buf).unwrap(); - assert_eq!(read, SHORT.len()); - assert_eq!(buf.as_slice(), SHORT); - } - - #[test] - fn test_replace() { - let mut cursor = Cursor::new(Vec::from(FILE)); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .build_sync(&mut cursor); - - let written = f.write(b"Hello World").unwrap(); - assert_eq!(written, 11); - f.flush().unwrap(); - - cursor.rewind().unwrap(); - - let mut buf = vec![0u8; 20]; - let read = cursor.read(&mut buf).unwrap(); - assert_eq!(read, 20); - assert_eq!(buf.as_slice(), b"Hello World dolor si"); - } - - #[test] - fn test_append() { - let mut cursor = Cursor::new(Vec::from(SHORT)); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .build_sync(&mut cursor); - - let written = f.write(FILE).unwrap(); - assert_eq!(written, FILE.len()); - f.flush().unwrap(); - - cursor.rewind().unwrap(); - - let mut buf = vec![0u8; FILE.len()]; - let read = cursor.read(&mut buf).unwrap(); - assert_eq!(read, FILE.len()); - assert_eq!(buf.as_slice(), FILE); - } - - #[test] - fn test_replace_arbitrary() { - let mut cursor = Cursor::new(Vec::from(FILE)); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .build_sync(&mut cursor); - - f.seek(SeekFrom::Start(9)).unwrap(); - let written = f.write(b"Hello World").unwrap(); - assert_eq!(written, 11); - f.flush().unwrap(); - - cursor.rewind().unwrap(); - - let mut buf = vec![0u8; 30]; - let read = cursor.read(&mut buf).unwrap(); - assert_eq!(read, 30); - assert_eq!(buf.as_slice(), b"Lorem ipsHello Worldt amet, co"); - } - - #[test] - fn test_read_cache_limit() { - let mut cursor = Cursor::new(FILE); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .with_max_blocks(13) - .build_sync(&mut cursor); - - let mut buf = [0u8; 100]; - for i in 0..(FILE.len() / 100) { - let count = f.read(&mut buf).unwrap(); - assert_eq!(count, 100); - assert_eq!(buf.as_slice(), &FILE[i * 100..(i + 1) * 100]); - assert!(f.buffer.blocks.len() <= 13); - } - - let count = f.read(&mut buf).unwrap(); - assert_eq!(count, 12); - assert_eq!(&buf[..count], &FILE[4100..4112]); - } - - #[test] - fn test_read_after_end() { - let mut cursor = Cursor::new(FILE); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .build_sync(&mut cursor); - - let mut buf = [0u8; 100]; - f.seek(SeekFrom::End(100)).unwrap(); - let read = f.read(&mut buf).unwrap(); - assert_eq!(read, 0); - } - - #[test] - fn test_random_writes() { - let mut file = Vec::from(FILE); - let mut cursor = Cursor::new(&mut file); - let mut f = FileBufferBuilder::new() - .with_block_size(13) - .with_max_blocks(13) - .build_sync(&mut cursor); - - let mut rng = StdRng::from_seed(*b"Hiya World This is a random seed"); - // let mut rng = StdRng::from_entropy(); - - for _ in 0..1000 { - let i = rng.gen_range(0..FILE.len()); - let c = rng.gen_range(0u8..255); - - f.seek(SeekFrom::Start(i as u64)).unwrap(); - f.write(slice::from_ref(&c)).unwrap(); - } - f.flush().unwrap(); - - let mut buf = vec![0u8; FILE.len()]; - f.rewind().unwrap(); - f.read_exact(buf.as_mut_slice()).unwrap(); - - assert_eq!(file, buf); + }; } } diff --git a/src/buffer/async_buf.rs b/src/buffer/async_buf.rs new file mode 100644 index 0000000..7f0fdae --- /dev/null +++ b/src/buffer/async_buf.rs @@ -0,0 +1,841 @@ +#![cfg(feature = "async-tokio")] + +use super::Buffer; + +use std::io::{self, SeekFrom}; +use std::{ + mem, + pin::Pin, + task::{Context, Poll}, +}; + +use enum_as_inner::EnumAsInner; +use pin_project::pin_project; +use strum::AsRefStr; +use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite, ReadBuf}; + +#[derive(EnumAsInner, AsRefStr)] +enum AsyncFileBufState { + Normal, + StartSeek(SeekFrom), + Seeking, + Reading { + block: u64, + buf: Vec, + read: usize, + }, + Writing { + block: u64, + buf: Vec, + written: usize, + }, + Closing, +} + +impl Default for AsyncFileBufState { + fn default() -> Self { + Self::Normal + } +} + +/// [FileBufferAsync] wraps another AsyncReader/AsyncWriter and is able +/// to hold a large buffer of the file and allows for seeks without clearing the +/// buffer. The buffer has a limited capacity which can be set with +/// [super::FileBufferOptions::with_max_cache()]/[super::FileBufferOptions::with_max_blocks()]. +/// +/// It does this by internally storing a series of blocks each of a +/// predetermined size ([super::FileBufferOptions::with_block_size()]). When the +/// buffer gets too big, then the least recently used blocks will be removed +/// from the cache. +/// +/// This wrapper is most useful for applications where the file is seeked often +/// and many reads/writes happen close together. +/// +/// In order to create a [FileBufferAsync], the [super::FileBufferOptions] must +/// be used. +/// +/// [FileBufferAsync] is only available when the feature `async-tokio` is +/// enabled. +/// +/// ```no_run +/// # async { +/// use mbon::buffer::FileBufferOptions; +/// use tokio::fs::File; +/// +/// let file = File::options() +/// .read(true) +/// .write(true) +/// .open("my_file.mbon").await.unwrap(); +/// +/// let fb = FileBufferOptions::new() +/// .with_block_size(4096) +/// .with_max_cache(1_000_000) +/// .build_async(file); +/// # }; +/// ``` +#[pin_project] +pub struct FileBufferAsync { + buffer: Buffer, + #[pin] + file: F, + cursor: Option, + state: AsyncFileBufState, +} +impl FileBufferAsync { + pub(super) fn new(buffer: Buffer, file: F) -> Self { + Self { + buffer, + file, + cursor: None, + state: AsyncFileBufState::default(), + } + } +} + +impl FileBufferAsync { + fn internal_poll_read_block( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + block: u64, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + let block_size = self.buffer.block_size; + match state { + AsyncFileBufState::Normal => { + println!("Seeking to block {block}"); + if self + .as_mut() + .internal_cursor_try_seek(cx, SeekFrom::Start(block * block_size))? + .is_pending() + { + return Poll::Pending; + } + + let me = self.as_mut().project(); + *me.state = AsyncFileBufState::Reading { + block, + buf: vec![0u8; block_size as usize], + read: 0, + }; + self.internal_poll_read_block(cx, block) + } + AsyncFileBufState::Reading { + block, + mut buf, + read, + } => { + let me = self.as_mut().project(); + debug_assert_eq!(*me.cursor, Some(block * block_size + read as u64)); + println!("Reading block {block}"); + + let mut b = ReadBuf::new(buf.as_mut_slice()); + b.set_filled(read); + if me.file.poll_read(cx, &mut b)?.is_pending() { + *me.state = AsyncFileBufState::Reading { block, buf, read }; + return Poll::Pending; + } + let filled = b.filled().len(); + let read_this_time = filled - read; + let read = filled; + *me.cursor = Some(block * me.buffer.block_size + read as u64); + if b.remaining() == 0 { + me.buffer.insert(block, buf); + return Poll::Ready(Ok(true)); + } + if read_this_time == 0 { + if read == 0 { + return Poll::Ready(Ok(false)); + } + for i in (read..block_size as usize).rev() { + buf.remove(i); + } + me.buffer.insert(block, buf); + return Poll::Ready(Ok(true)); + } + *me.state = AsyncFileBufState::Reading { block, buf, read }; + self.internal_poll_read_block(cx, block) + } + AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Writing State", + ))), + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + state => { + let me = self.as_mut().project(); + *me.state = state; + + let poll = self.as_mut().internal_cursor_poll_complete(cx)?; + if poll.is_ready() { + return self.internal_poll_read_block(cx, block); + } + Poll::Pending + } + } + } +} + +impl AsyncRead for FileBufferAsync { + fn poll_read( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> Poll> { + let mut read = 0; + println!("Reading into buf"); + while buf.remaining() > 0 { + let me = self.as_mut().project(); + match me.buffer.get_next_block() { + Ok((offset, block)) => { + println!("Next block is available"); + + let block = &block[offset..]; + let to_read = block.len().min(buf.remaining()); + if to_read == 0 { + me.buffer.purge_if_full(); + println!("End of file"); + return Poll::Ready(Ok(())); + } + read += to_read; + buf.put_slice(&block[..to_read]); + me.buffer.cursor += to_read as u64; + continue; + } + Err((_offset, block)) => { + if read > 0 { + println!("! Read Partial !"); + return Poll::Ready(Ok(())); + } + println!("Need to read block {block}"); + let poll = self.as_mut().internal_poll_read_block(cx, block)?; + if let Poll::Ready(exists) = poll { + if !exists { + let me = self.as_mut().project(); + me.buffer.purge_if_full(); + println!("Doesn't exist"); + return Poll::Ready(Ok(())); + } + } else { + println!("Pending..."); + return Poll::Pending; + } + } + } + } + + self.project().buffer.purge_if_full(); + println!("! All Done !"); + Poll::Ready(Ok(())) + } +} + +impl FileBufferAsync { + fn internal_cursor_try_seek( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + position: SeekFrom, + ) -> Poll> { + if self.state.is_normal() { + println!("Starting seek to {position:?}"); + self.as_mut().internal_cursor_start_seek(position)?; + } + self.internal_cursor_poll_complete(cx) + } + + fn internal_cursor_start_seek(self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + let me = self.project(); + *me.state = AsyncFileBufState::StartSeek(position); + Ok(()) + } + + fn internal_cursor_poll_complete( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + match state { + AsyncFileBufState::Normal => Poll::Ready(Ok(match self.cursor { + Some(val) => val, + None => 0, + })), + AsyncFileBufState::StartSeek(seek) => match seek { + SeekFrom::Start(pos) => { + let me = self.as_mut().project(); + if let Some(actual) = me.cursor { + println!("At {actual}, want to go to {pos}"); + if *actual == pos { + println!("Already at {pos}, no action needed"); + return Poll::Ready(Ok(*actual)); + } + } + if me.file.poll_complete(cx)?.is_pending() { + *me.state = AsyncFileBufState::StartSeek(seek); + return Poll::Pending; + } + println!("!! Starting file seek"); + let me = self.as_mut().project(); + me.file.start_seek(SeekFrom::Start(pos))?; + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + *me.cursor = Some(pos); + return Poll::Ready(Ok(pos)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + seek => { + let me = self.as_mut().project(); + me.file.start_seek(seek)?; + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + *me.cursor = Some(pos); + return Poll::Ready(Ok(pos)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + }, + AsyncFileBufState::Seeking => { + let me = self.as_mut().project(); + println!("Are we there yet?"); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + *me.cursor = Some(pos); + return Poll::Ready(Ok(pos)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + AsyncFileBufState::Reading { block, buf, read } => { + let me = self.as_mut().project(); + *me.state = AsyncFileBufState::Reading { block, buf, read }; + Poll::Ready(Ok(match self.cursor { + Some(val) => val, + None => 0, + })) + } + AsyncFileBufState::Writing { + block, + buf, + written, + } => { + let me = self.as_mut().project(); + *me.state = AsyncFileBufState::Writing { + block, + buf, + written, + }; + Poll::Ready(Ok(match self.cursor { + Some(val) => val, + None => 0, + })) + } + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + } + } +} + +impl AsyncSeek for FileBufferAsync { + fn start_seek(self: Pin<&mut Self>, position: SeekFrom) -> io::Result<()> { + let me = self.project(); + *me.state = AsyncFileBufState::StartSeek(position); + Ok(()) + } + + fn poll_complete(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let me = self.as_mut().project(); + let state = mem::take(me.state); + match state { + AsyncFileBufState::Normal => Poll::Ready(Ok(me.buffer.cursor)), + AsyncFileBufState::StartSeek(SeekFrom::Start(position)) => { + me.buffer.cursor = position; + Poll::Ready(Ok(me.buffer.cursor)) + } + AsyncFileBufState::StartSeek(SeekFrom::Current(offset)) => { + me.buffer.cursor = match me.buffer.cursor.checked_add_signed(offset) { + Some(v) => v, + None => { + return Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot seek to a negative position", + ))) + } + }; + Poll::Ready(Ok(me.buffer.cursor)) + } + AsyncFileBufState::StartSeek(seek) => { + me.file.start_seek(seek)?; + let me = self.as_mut().project(); + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + me.buffer.cursor = pos; + *me.cursor = Some(pos); + return Poll::Ready(Ok(me.buffer.cursor)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + AsyncFileBufState::Seeking => { + let poll = me.file.poll_complete(cx)?; + if let Poll::Ready(pos) = poll { + me.buffer.cursor = pos; + *me.cursor = Some(pos); + return Poll::Ready(Ok(me.buffer.cursor)); + } + *me.state = AsyncFileBufState::Seeking; + Poll::Pending + } + AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Reading State", + ))), + AsyncFileBufState::Writing { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Writing State", + ))), + AsyncFileBufState::Closing => { + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + } + } +} + +impl FileBufferAsync { + fn internal_poll_write_block( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + block: u64, + ) -> Poll> { + let state = mem::take(self.as_mut().project().state); + let block_size = self.buffer.block_size; + + match state { + AsyncFileBufState::Normal => { + println!("Going to write block {block}"); + + if self + .as_mut() + .internal_cursor_try_seek(cx, SeekFrom::Start(block * block_size))? + .is_pending() + { + println!("! Pending Seek !"); + return Poll::Pending; + } + + let me = self.as_mut().project(); + let data = me.buffer.blocks.get(&block).ok_or_else(|| { + io::Error::new(io::ErrorKind::InvalidData, "Block does not exist") + })?; + *me.state = AsyncFileBufState::Writing { + block, + buf: data.data.clone(), + written: 0, + }; + self.internal_poll_write_block(cx, block) + } + AsyncFileBufState::Reading { .. } => Poll::Ready(Err(io::Error::new( + io::ErrorKind::InvalidData, + "In a Reading State", + ))), + AsyncFileBufState::Writing { + block, + buf, + written, + } => { + let me = self.as_mut().project(); + debug_assert_eq!(*me.cursor, Some(block * block_size + written as u64)); + + println!("Writing to block {block}"); + let poll = me.file.poll_write(cx, &buf[written..])?; + if let Poll::Ready(w) = poll { + println!("Wrote {w} bytes"); + let written = written + w; + *me.cursor = Some(block * block_size + written as u64); + if written == buf.len() { + me.buffer.mark_clean(block); + println!("! All Done !"); + return Poll::Ready(Ok(())); + } + + *me.state = AsyncFileBufState::Writing { + block, + buf, + written, + }; + return self.internal_poll_write_block(cx, block); + } + *me.state = AsyncFileBufState::Writing { + block, + buf, + written, + }; + println!("! Pending Write !"); + Poll::Pending + } + AsyncFileBufState::Closing => { + let me = self.as_mut().project(); + *me.state = AsyncFileBufState::Closing; + Poll::Ready(Err(io::Error::new(io::ErrorKind::InvalidData, "Closed"))) + } + state => { + let me = self.as_mut().project(); + *me.state = state; + + let poll = self.as_mut().internal_cursor_poll_complete(cx)?; + if poll.is_ready() { + return self.internal_poll_write_block(cx, block); + } + println!("! Pending Seek !"); + Poll::Pending + } + } + } +} + +impl AsyncWrite for FileBufferAsync { + fn poll_write( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + buf: &[u8], + ) -> Poll> { + let mut written = 0; + + let block_size = self.buffer.block_size as usize; + while written < buf.len() { + let me = self.as_mut().project(); + match me.buffer.get_next_block_modify() { + Ok((offset, data)) => { + let block = &mut data[offset..]; + let b = &buf[written..]; + + let to_write = block.len().min(b.len()); + (&mut block[..to_write]).copy_from_slice(&b[..to_write]); + written += to_write; + + if data.len() < block_size { + let b = &buf[written..]; + let to_extend = (block_size - data.len()).min(b.len()); + data.extend_from_slice(&b[..to_extend]); + me.buffer.cursor += to_extend as u64; + } + + me.buffer.cursor += to_write as u64; + + continue; + } + Err((offset, block)) => { + let me = self.as_mut().project(); + let buf = &buf[written..]; + + if offset == 0 && buf.len() > block_size { + // Overwrite the whole block without reading it + me.buffer.insert(block, Vec::from(&buf[..block_size])); + me.buffer.mark_modified(block); + written += block_size; + me.buffer.cursor += block_size as u64; + continue; + } + + // Return the number of successful bytes written if any + // before making a call to the file + if written > 0 { + me.buffer.purge_if_full(); + return Poll::Ready(Ok(written)); + } + let poll = self.as_mut().internal_poll_read_block(cx, block)?; + + if let Poll::Ready(exists) = poll { + if !exists { + // Create a new block + let me = self.as_mut().project(); + let to_write = block_size.min(buf.len()); + me.buffer.insert(block, Vec::from(&buf[..to_write])); + me.buffer.mark_modified(block); + written += to_write; + me.buffer.cursor += to_write as u64; + continue; + } + } else { + return Poll::Pending; + } + continue; + } + } + } + + self.project().buffer.purge_if_full(); + Poll::Ready(Ok(written)) + } + + fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + while let Some(block) = self.buffer.get_next_modified_block() { + if self + .as_mut() + .internal_poll_write_block(cx, block)? + .is_pending() + { + return Poll::Pending; + } + } + + let me = self.as_mut().project(); + if me.file.poll_flush(cx)?.is_pending() { + return Poll::Pending; + } + + me.buffer.purge_if_full(); + + Poll::Ready(Ok(())) + } + + fn poll_shutdown( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll> { + if self.state.is_closing() { + return self.project().file.poll_shutdown(cx); + } + if self.as_mut().poll_flush(cx)?.is_pending() { + return Poll::Pending; + } + let me = self.project(); + *me.state = AsyncFileBufState::Closing; + me.file.poll_shutdown(cx) + } +} + +#[cfg(test)] +mod test { + use super::super::test_suite::*; + use crate::file_test; + + use super::super::FileBufferOptions; + use super::*; + + use rand::{rngs::StdRng, Rng as _, SeedableRng}; + use std::slice; + use tokio::{ + fs::File, + io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt}, + }; + + const SHORT: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit."; + + file_test!( + async fn test_read_async(_id: &str) { + let lic = lorem_ipsom_content(); + + let file = lorem_ipsom(); + let file = File::open(file).await.unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build_async(file); + + let mut buf = [0u8; 100]; + for i in 0..(lic.len() / 100) { + f.read_exact(&mut buf).await.unwrap(); + assert_eq!(buf.as_slice(), &lic[i * 100..(i + 1) * 100]); + } + + let mut buf = Vec::new(); + f.read_to_end(&mut buf).await.unwrap(); + assert_eq!(&buf, &lic[(lic.len() / 100) * 100..]); + } + ); + + file_test!( + async fn test_write_async(id: &str) { + let file = file_name("lorem_ipsom_write", id, "txt"); + let mut file = File::options() + .create(true) + .read(true) + .write(true) + .open(file) + .await + .unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build_async(&mut file); + + f.write_all(SHORT).await.unwrap(); + f.flush().await.unwrap(); + + AsyncSeekExt::rewind(&mut file).await.unwrap(); + + let mut buf = vec![0u8; SHORT.len()]; + let read = AsyncReadExt::read(&mut file, &mut buf).await.unwrap(); + assert_eq!(read, SHORT.len()); + assert_eq!(buf.as_slice(), SHORT); + } + ); + + file_test!( + async fn test_replace_async(id: &str) { + let file = copy_lorem_ipsom(id); + let mut file = File::options() + .read(true) + .write(true) + .open(file) + .await + .unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build_async(&mut file); + + f.write_all(b"Hello World").await.unwrap(); + f.flush().await.unwrap(); + + AsyncSeekExt::rewind(&mut file).await.unwrap(); + + let mut buf = vec![0u8; 20]; + let read = AsyncReadExt::read(&mut file, &mut buf).await.unwrap(); + assert_eq!(read, 20); + assert_eq!(buf.as_slice(), b"Hello World dolor si"); + } + ); + + file_test!( + async fn test_append_async(id: &str) { + let lic = lorem_ipsom_content(); + + let file = file_name("lorem_ipsom_append", id, "txt"); + let mut file = File::options() + .create(true) + .read(true) + .write(true) + .open(file) + .await + .unwrap(); + file.write_all(SHORT).await.unwrap(); + file.flush().await.unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build_async(&mut file); + + f.write_all(lic.as_slice()).await.unwrap(); + f.flush().await.unwrap(); + + AsyncSeekExt::rewind(&mut file).await.unwrap(); + + let mut buf = vec![0u8; lic.len()]; + let read = AsyncReadExt::read(&mut file, &mut buf).await.unwrap(); + assert_eq!(read, lic.len()); + assert_eq!(buf.as_slice(), lic); + } + ); + + file_test!( + async fn test_replace_arbitrary_async(id: &str) { + let file = copy_lorem_ipsom(id); + let mut file = File::options() + .read(true) + .write(true) + .open(file) + .await + .unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build_async(&mut file); + + f.seek(SeekFrom::Start(9)).await.unwrap(); + f.write_all(b"Hello World").await.unwrap(); + f.flush().await.unwrap(); + + AsyncSeekExt::rewind(&mut file).await.unwrap(); + + let mut buf = vec![0u8; 30]; + let read = AsyncReadExt::read(&mut file, &mut buf).await.unwrap(); + assert_eq!(read, 30); + assert_eq!(buf.as_slice(), b"Lorem ipsHello Worldt amet, co"); + } + ); + + file_test!( + async fn test_read_cache_limit_async(_id: &str) { + let lic = lorem_ipsom_content(); + + let file = lorem_ipsom(); + let mut file = File::open(file).await.unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .with_max_blocks(13) + .build_async(&mut file); + + let mut buf = [0u8; 100]; + for i in 0..(lic.len() / 100) { + f.read_exact(&mut buf).await.unwrap(); + assert_eq!(buf.as_slice(), &lic[i * 100..(i + 1) * 100]); + assert!(f.buffer.blocks.len() <= 13); + } + + let mut buf = Vec::new(); + f.read_to_end(&mut buf).await.unwrap(); + assert_eq!(&buf, &lic[(lic.len() / 100) * 100..]); + } + ); + + file_test!( + async fn test_read_after_end_async(_id: &str) { + let file = lorem_ipsom(); + let mut file = File::open(file).await.unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build_async(&mut file); + + let mut buf = [0u8; 100]; + f.seek(SeekFrom::End(100)).await.unwrap(); + let read = f.read(&mut buf).await.unwrap(); + assert_eq!(read, 0); + } + ); + + file_test!( + async fn test_random_writes_async(id: &str) { + let lic = lorem_ipsom_content(); + + let file = copy_lorem_ipsom(id); + let mut file = File::options() + .read(true) + .write(true) + .open(file) + .await + .unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .with_max_blocks(13) + .build_async(&mut file); + + let mut rng = StdRng::from_seed(*b"Hiya World This is a random seed"); + // let mut rng = StdRng::from_entropy(); + + for _ in 0..1000 { + let i = rng.gen_range(0..lic.len()); + let c = rng.gen_range(0u8..255); + + f.seek(SeekFrom::Start(i as u64)).await.unwrap(); + f.write_all(slice::from_ref(&c)).await.unwrap(); + } + f.flush().await.unwrap(); + + let mut buf = vec![0u8; lic.len()]; + f.rewind().await.unwrap(); + f.read_exact(buf.as_mut_slice()).await.unwrap(); + + let mut expected = Vec::new(); + file.rewind().await.unwrap(); + file.read_to_end(&mut expected).await.unwrap(); + + assert_eq!(expected, buf); + } + ); +} diff --git a/src/buffer/sync_buf.rs b/src/buffer/sync_buf.rs new file mode 100644 index 0000000..b937f81 --- /dev/null +++ b/src/buffer/sync_buf.rs @@ -0,0 +1,539 @@ +use std::io::{self, Read, Seek, SeekFrom, Write}; + +use super::Buffer; + +/// [FileBuffer] wraps another Reader/Writer and is able +/// to hold a large buffer of the file and allows for seeks without clearing the +/// buffer. The buffer has a limited capacity which can be set with +/// [super::FileBufferOptions::with_max_cache()]/[super::FileBufferOptions::with_max_blocks()]. +/// +/// It does this by internally storing a series of blocks each of a +/// predetermined size ([super::FileBufferOptions::with_block_size()]). When the buffer +/// gets too big, then the least recently used blocks will be removed from the +/// cache. +/// +/// This wrapper is most useful for applications where the file is seeked often +/// and many reads/writes happen close together. +/// +/// In order to create a [FileBuffer], the [super::FileBufferOptions] must be used. +/// +/// ```no_run +/// use mbon::buffer::FileBufferOptions; +/// use std::fs::File; +/// +/// let file = File::options() +/// .read(true) +/// .write(true) +/// .open("my_file.mbon").unwrap(); +/// +/// let fb = FileBufferOptions::new() +/// .with_block_size(4096) +/// .with_max_cache(1_000_000) +/// .build(file); +/// ``` +pub struct FileBuffer { + buffer: Buffer, + file: F, + cursor: Option, +} + +impl FileBuffer { + pub(super) fn new(buffer: Buffer, file: F) -> Self { + Self { + buffer, + file, + cursor: None, + } + } +} + +impl FileBuffer { + fn internal_cursor_seek(&mut self, position: SeekFrom) -> io::Result { + match position { + SeekFrom::Start(position) => { + if let Some(actual) = &self.cursor { + if *actual == position { + return Ok(*actual); + } + } + let position = self.file.seek(SeekFrom::Start(position))?; + self.cursor = Some(position); + Ok(position) + } + seek => { + let position = self.file.seek(seek)?; + self.cursor = Some(position); + Ok(position) + } + } + } +} + +impl Seek for FileBuffer { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + match pos { + SeekFrom::Start(pos) => { + self.buffer.cursor = pos; + Ok(self.buffer.cursor) + } + SeekFrom::Current(offset) => { + self.buffer.cursor = match self.buffer.cursor.checked_add_signed(offset) { + Some(v) => v, + None => { + return Err(io::Error::new( + io::ErrorKind::InvalidInput, + "Cannot seek to a negative position", + )) + } + }; + Ok(self.buffer.cursor) + } + seek => { + self.buffer.cursor = self.internal_cursor_seek(seek)?; + Ok(self.buffer.cursor) + } + } + } +} + +impl FileBuffer { + fn internal_read_block(&mut self, block: u64) -> io::Result { + let block_size = self.buffer.block_size; + self.internal_cursor_seek(SeekFrom::Start(block * block_size))?; + + let mut buf = vec![0u8; block_size as usize]; + let mut read = 0; + + loop { + debug_assert_eq!(self.cursor, Some(block * block_size + read as u64)); + + let read_buf = &mut buf[read..]; + let just_read = self.file.read(read_buf)?; + read += just_read; + self.cursor = Some(block * block_size + read as u64); + + if read == block_size as usize { + self.buffer.insert(block, buf); + return Ok(true); + } + if just_read == 0 { + if read == 0 { + return Ok(false); + } + for i in (read..block_size as usize).rev() { + buf.remove(i); + } + self.buffer.insert(block, buf); + return Ok(true); + } + } + } +} + +impl Read for FileBuffer { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut read = 0; + + while read < buf.len() { + match self.buffer.get_next_block() { + Ok((offset, data)) => { + let data = &data[offset..]; + let read_buf = &mut buf[read..]; + let to_read = data.len().min(read_buf.len()); + if to_read == 0 { + self.buffer.purge_if_full(); + return Ok(read); + } + let data = &data[..to_read]; + let read_buf = &mut read_buf[..to_read]; + read_buf.copy_from_slice(&data); + read += to_read; + self.buffer.cursor += to_read as u64; + continue; + } + Err((_offset, block)) => { + if read > 0 { + self.buffer.purge_if_full(); + return Ok(read); + } + let exists = self.internal_read_block(block)?; + if !exists { + self.buffer.purge_if_full(); + return Ok(read); + } + continue; + } + } + } + + self.buffer.purge_if_full(); + Ok(read) + } +} + +impl FileBuffer { + fn internal_write_block(&mut self, block: u64) -> io::Result<()> { + let block_size = self.buffer.block_size; + self.internal_cursor_seek(SeekFrom::Start(block * block_size))?; + + let buf = &self + .buffer + .blocks + .get(&block) + .ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "Block does not exist"))? + .data; + let mut written = 0; + + loop { + debug_assert_eq!(self.cursor, Some(block * block_size + written as u64)); + + let write_buf = &buf[written..]; + let just_wrote = self.file.write(write_buf)?; + written += just_wrote; + self.cursor = Some(block * block_size + written as u64); + if written == buf.len() { + self.buffer.mark_clean(block); + return Ok(()); + } + } + } +} + +impl Write for FileBuffer { + fn write(&mut self, buf: &[u8]) -> io::Result { + let mut written = 0; + let block_size = self.buffer.block_size as usize; + + while written < buf.len() { + match self.buffer.get_next_block_modify() { + Ok((offset, data)) => { + let data_buf = &mut data[offset..]; + let b = &buf[written..]; + + let to_write = data_buf.len().min(b.len()); + (&mut data_buf[..to_write]).copy_from_slice(&b[..to_write]); + written += to_write; + + if data.len() < block_size { + let b = &buf[written..]; + let to_extend = (block_size - data.len()).min(b.len()); + data.extend_from_slice(&b[..to_extend]); + self.buffer.cursor += to_extend as u64; + } + + self.buffer.cursor += to_write as u64; + + continue; + } + Err((offset, block)) => { + let buf = &buf[written..]; + + if offset == 0 && buf.len() > block_size { + // Overwrite the whole block without reading it + self.buffer.insert(block, Vec::from(&buf[..block_size])); + self.buffer.mark_modified(block); + written += block_size; + self.buffer.cursor += block_size as u64; + continue; + } + + // Return the number of successful bytes written if any + // before making a call to the file + if written > 0 { + self.buffer.purge_if_full(); + return Ok(written); + } + + let exists = self.internal_read_block(block)?; + if !exists { + // Create a new block + let to_write = (block_size - offset).min(buf.len()); + let mut data = vec![0u8; to_write + offset]; + + let data_buf = &mut data[offset..]; + data_buf.copy_from_slice(&buf[..to_write]); + + self.buffer.insert(block, data); + self.buffer.mark_modified(block); + written += to_write; + self.buffer.cursor += to_write as u64; + continue; + } else { + } + + continue; + } + } + } + + self.buffer.purge_if_full(); + Ok(written) + } + + fn flush(&mut self) -> io::Result<()> { + while let Some(block) = self.buffer.get_next_modified_block() { + self.internal_write_block(block)?; + } + self.buffer.purge_if_full(); + self.file.flush() + } +} + +impl FileBuffer +where + F: Write + Seek, +{ + fn flush_blocks(&mut self) -> io::Result<()> { + let modified = self.buffer.take_modified(); + + let mut position = match modified.first() { + Some(sect) => self + .file + .seek(SeekFrom::Start(sect * self.buffer.block_size))?, + None => self.file.seek(SeekFrom::Current(0))?, + }; + let block_size = self.buffer.block_size; + + for block in modified { + let buf = match self.buffer.get_block_mut(block) { + Some(b) => b, + None => continue, + }; + let pos = block * block_size; + if position != pos { + self.file.seek(SeekFrom::Start(pos))?; + position = pos; + } + + self.file.write_all(buf.as_slice())?; + position += buf.len() as u64; + } + self.file.flush()?; + + if self.buffer.is_full() { + self.buffer.purge_least_recently_used(); + } + + Ok(()) + } + + /// Clear the cache + /// + /// If there are any modified changes, they will be written to disk before + /// clearing the cache. + pub fn clear_cache(&mut self) -> io::Result<()> { + self.flush_blocks()?; + self.buffer.blocks.clear(); + Ok(()) + } +} + +#[cfg(test)] +mod test { + use super::super::test_suite::*; + use rand::{rngs::StdRng, Rng, SeedableRng}; + use std::{ + fs::File, + io::{Seek, Write}, + slice, + }; + + use crate::{buffer::FileBufferOptions, file_test}; + + use super::*; + + const SHORT: &[u8] = b"Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Nisl pretium fusce id velit ut tortor pretium viverra. Tincidunt nunc pulvinar sapien et ligula ullamcorper malesuada proin. Gravida neque convallis a cras semper auctor neque vitae tempus. Cursus eget nunc scelerisque viverra mauris in aliquam. Viverra maecenas accumsan lacus vel facilisis volutpat est velit. Pulvinar mattis nunc sed blandit libero volutpat sed cras ornare. Massa eget egestas purus viverra accumsan in nisl nisi scelerisque. Ornare massa eget egestas purus viverra accumsan in nisl. Sed risus ultricies tristique nulla aliquet enim tortor. Laoreet suspendisse interdum consectetur libero id faucibus nisl tincidunt. Nisl tincidunt eget nullam non. Pretium quam vulputate dignissim suspendisse in est. Non enim praesent elementum facilisis. Nibh mauris cursus mattis molestie a. Iaculis nunc sed augue lacus viverra vitae. In mollis nunc sed id semper risus. Augue neque gravida in fermentum et sollicitudin ac. Pellentesque pulvinar pellentesque habitant morbi tristique senectus. Libero nunc consequat interdum varius sit."; + + file_test!( + fn test_read(_id: &str) { + let lic = lorem_ipsom_content(); + + let file = lorem_ipsom(); + let file = File::open(file).unwrap(); + let mut f = FileBufferOptions::new().with_block_size(13).build(file); + + let mut buf = [0u8; 100]; + for i in 0..(lic.len() / 100) { + f.read_exact(&mut buf).unwrap(); + assert_eq!(buf.as_slice(), &lic[i * 100..(i + 1) * 100]); + } + + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + assert_eq!(&buf, &lic[(lic.len() / 100) * 100..]); + } + ); + + file_test!( + fn test_write(id: &str) { + let file = file_name("lorem_ipsom_write", id, "txt"); + let mut file = File::options() + .create(true) + .read(true) + .write(true) + .open(file) + .unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build(&mut file); + + f.write_all(SHORT).unwrap(); + f.flush().unwrap(); + + Seek::rewind(&mut file).unwrap(); + + let mut buf = vec![0u8; SHORT.len()]; + let read = Read::read(&mut file, &mut buf).unwrap(); + assert_eq!(read, SHORT.len()); + assert_eq!(buf.as_slice(), SHORT); + } + ); + + file_test!( + fn test_replace(id: &str) { + let file = copy_lorem_ipsom(id); + let mut file = File::options().read(true).write(true).open(file).unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build(&mut file); + + let written = f.write(b"Hello World").unwrap(); + assert_eq!(written, 11); + f.flush().unwrap(); + + Seek::rewind(&mut file).unwrap(); + + let mut buf = vec![0u8; 20]; + let read = Read::read(&mut file, &mut buf).unwrap(); + assert_eq!(read, 20); + assert_eq!(buf.as_slice(), b"Hello World dolor si"); + } + ); + + file_test!( + fn test_append(id: &str) { + let lic = lorem_ipsom_content(); + + let file = file_name("lorem_ipsom_append", id, "txt"); + let mut file = File::options() + .create(true) + .read(true) + .write(true) + .open(file) + .unwrap(); + file.write_all(SHORT).unwrap(); + file.flush().unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build(&mut file); + + f.write_all(lic.as_slice()).unwrap(); + f.flush().unwrap(); + + Seek::rewind(&mut file).unwrap(); + + let mut buf = vec![0u8; lic.len()]; + let read = Read::read(&mut file, &mut buf).unwrap(); + assert_eq!(read, lic.len()); + assert_eq!(buf.as_slice(), lic); + } + ); + + file_test!( + fn test_replace_arbitrary(id: &str) { + let file = copy_lorem_ipsom(id); + let mut file = File::options().read(true).write(true).open(file).unwrap(); + + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build(&mut file); + + f.seek(SeekFrom::Start(9)).unwrap(); + f.write_all(b"Hello World").unwrap(); + f.flush().unwrap(); + + Seek::rewind(&mut file).unwrap(); + + let mut buf = vec![0u8; 30]; + let read = Read::read(&mut file, &mut buf).unwrap(); + assert_eq!(read, 30); + assert_eq!(buf.as_slice(), b"Lorem ipsHello Worldt amet, co"); + } + ); + + file_test!( + fn test_read_cache_limit(_id: &str) { + let lic = lorem_ipsom_content(); + + let file = lorem_ipsom(); + let mut file = File::open(file).unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .with_max_blocks(13) + .build(&mut file); + + let mut buf = [0u8; 100]; + for i in 0..(lic.len() / 100) { + f.read_exact(&mut buf).unwrap(); + assert_eq!(buf.as_slice(), &lic[i * 100..(i + 1) * 100]); + assert!(f.buffer.blocks.len() <= 13); + } + + let mut buf = Vec::new(); + f.read_to_end(&mut buf).unwrap(); + assert_eq!(&buf, &lic[(lic.len() / 100) * 100..]); + } + ); + + file_test!( + fn test_read_after_end(_id: &str) { + let file = lorem_ipsom(); + let mut file = File::open(file).unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .build(&mut file); + + let mut buf = [0u8; 100]; + f.seek(SeekFrom::End(100)).unwrap(); + let read = f.read(&mut buf).unwrap(); + assert_eq!(read, 0); + } + ); + + file_test!( + fn test_random_writes(id: &str) { + let lic = lorem_ipsom_content(); + + let file = copy_lorem_ipsom(id); + let mut file = File::options().read(true).write(true).open(file).unwrap(); + let mut f = FileBufferOptions::new() + .with_block_size(13) + .with_max_blocks(13) + .build(&mut file); + + let mut rng = StdRng::from_seed(*b"Hiya World This is a random seed"); + // let mut rng = StdRng::from_entropy(); + + for _ in 0..1000 { + let i = rng.gen_range(0..lic.len()); + let c = rng.gen_range(0u8..255); + + f.seek(SeekFrom::Start(i as u64)).unwrap(); + f.write_all(slice::from_ref(&c)).unwrap(); + } + f.flush().unwrap(); + + let mut buf = vec![0u8; lic.len()]; + f.rewind().unwrap(); + f.read_exact(buf.as_mut_slice()).unwrap(); + + let mut expected = Vec::new(); + file.rewind().unwrap(); + file.read_to_end(&mut expected).unwrap(); + + assert_eq!(expected, buf); + } + ); +} diff --git a/src/engine.rs b/src/engine.rs index 7bdaecd..f05e19c 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -15,7 +15,7 @@ use tokio::task::{spawn_blocking, JoinHandle}; use std::thread::JoinHandle; use crate::{ - buffer::{FileBuffer, FileBufferBuilder}, + buffer::{FileBuffer, FileBufferOptions}, concurrent::{ConcurrentEngineClient, ConcurrentEngineWrapper}, data::{Data, PartialItem}, errors::{MbonError, MbonResult}, @@ -142,7 +142,7 @@ where /// Create a new engine from a file pub fn new(file: F) -> Self { Self { - file: Self::new_file(FileBufferBuilder::new().build_sync(file)), + file: Self::new_file(FileBufferOptions::new().build(file)), } } diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..21a270a --- /dev/null +++ b/test.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +cargo test --no-default-features --features sync +cargo test --no-default-features --features async-tokio From 2cf70c68c85e061e6a284e7e9c753fb91733a1ad Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Tue, 5 Mar 2024 21:55:17 -0700 Subject: [PATCH 07/10] Migrate to use either async streams or std streams --- Cargo.lock | 12 ++ Cargo.toml | 15 +-- src/buffer.rs | 45 +++---- src/buffer/async_buf.rs | 22 ++-- src/channel.rs | 40 ------ src/concurrent.rs | 29 ++--- src/data.rs | 104 +++++++-------- src/engine.rs | 273 ++++++++++++---------------------------- src/lib.rs | 57 ++++++++- src/marks.rs | 119 +++++++++--------- src/stream.rs | 98 +++++++++++++++ 11 files changed, 411 insertions(+), 403 deletions(-) create mode 100644 src/stream.rs diff --git a/Cargo.lock b/Cargo.lock index 50dabdd..7e6199c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -47,6 +47,17 @@ dependencies = [ "syn", ] +[[package]] +name = "async-recursion" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "async-trait" version = "0.1.77" @@ -309,6 +320,7 @@ dependencies = [ "anyhow", "async-channel", "async-generic", + "async-recursion", "async-trait", "byteorder", "enum-as-inner", diff --git a/Cargo.toml b/Cargo.toml index 0f87840..29b5d8c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,18 +15,19 @@ keywords = ["binary", "serde"] [dependencies] byteorder = "1" -enum-as-inner = "0.6" -maybe-async = { version = "0.2" } anyhow = "1" thiserror = "1" +enum-as-inner = "0.6" +strum = { version = "0.26", features = ["derive"] } +maybe-async = "0.2" +async-generic = "1.0" # Async Dependencies -async-generic = { version = "1.0.0", optional = true } async-channel = { version = "2", optional = true } -tokio = { version = "1", features = ["rt-multi-thread", "io-util"], optional = true } +tokio = { version = "1", features = ["rt-multi-thread", "io-util", "fs"], optional = true } async-trait = { version = "0.1", optional = true } -pin-project = "1.1.4" -strum = { version = "0.26.1", features = ["derive"] } +pin-project = { version = "1.1", optional = true } +async-recursion = "1.0.5" [dev-dependencies] futures = "0.3.30" @@ -43,7 +44,7 @@ tokio = { version = "1", features = ["full"] } sync = ["maybe-async/is_sync"] -async = ["dep:async-channel", "dep:async-generic", "dep:async-trait"] +async = ["dep:async-channel", "dep:async-trait", "dep:pin-project"] async-tokio = ["async", "dep:tokio"] diff --git a/src/buffer.rs b/src/buffer.rs index d4a18d2..21ccfa2 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -1,6 +1,6 @@ -//! Contains [FileBuffer] and [FileBufferAsync], which is a wrapper for files. +//! A wrapper for files. //! -//! [FileBuffer] and [FileBufferAsync] wraps another Reader/Writer and is able +//! [FileBuffer] wraps another Reader/Writer and is able //! to hold a large buffer of the file and allows for seeks without clearing the //! buffer. The buffer has a limited capacity which can be set with //! [FileBufferOptions::with_max_cache()]/[FileBufferOptions::with_max_blocks()]. @@ -13,50 +13,36 @@ //! This wrapper is most useful for applications where the file is seeked often //! and many reads/writes happen close together. //! -//! In order to create a [FileBuffer] or [FileBufferAsync], the +//! In order to create a [FileBuffer], the //! [FileBufferOptions] must be used. -//! -//! [FileBufferAsync] is only available when the feature `async-tokio` is -//! enabled. -//! -//! ```no_run -//! use mbon::buffer::FileBufferOptions; -//! use std::fs::File; -//! -//! let file = File::options() -//! .read(true) -//! .write(true) -//! .open("my_file.mbon").unwrap(); -//! -//! let fb = FileBufferOptions::new() -//! .with_block_size(4096) -//! .with_max_cache(1_000_000) -//! .build(file); -//! ``` use std::{ collections::{BTreeSet, BinaryHeap, HashMap}, mem, }; -pub mod async_buf; -pub mod sync_buf; +#[cfg(feature = "async-tokio")] +mod async_buf; +#[cfg(feature = "sync")] +mod sync_buf; +#[cfg(feature = "sync")] use std::io::{Read, Seek}; #[cfg(feature = "async-tokio")] use tokio::io::{AsyncRead, AsyncSeek}; +#[cfg(feature = "sync")] pub use self::sync_buf::FileBuffer; #[cfg(feature = "async-tokio")] -pub use self::async_buf::FileBufferAsync; +pub use self::async_buf::FileBufferAsync as FileBuffer; struct Block { data: Vec, access: u64, } -/// The internal buffer used by [FileBuffer] and [FileBufferAsync]. +/// The internal buffer used by [FileBuffer]. struct Buffer { blocks: HashMap, modified: BTreeSet, @@ -172,6 +158,7 @@ impl Buffer { /// reset all blocks to be unmodified and return all that were previously /// marked as modified + #[allow(unused)] fn take_modified(&mut self) -> Vec { let mut modified: Vec<_> = mem::take(&mut self.modified).into_iter().collect(); modified.sort_unstable(); @@ -179,6 +166,7 @@ impl Buffer { } /// Get the data from a block id. + #[allow(unused)] fn get_block_mut(&mut self, block: u64) -> Option<&mut Vec> { get_block!(mut self, block) } @@ -338,22 +326,23 @@ impl FileBufferOptions { /// Build a [FileBuffer] with a given stream. /// /// The stream must be at least a [Read] + [Seek] + #[cfg(feature = "sync")] pub fn build(&self, f: F) -> FileBuffer { let buffer = self.internal_build(); FileBuffer::new(buffer, f) } - /// Build a [FileBufferAsync] with a given async stream. + /// Build a [FileBuffer] with a given async stream. /// /// The stream must be at least a [AsyncRead] + [AsyncSeek] /// /// This function is only available with the feature `async-tokio` enabled. #[cfg(feature = "async-tokio")] - pub fn build_async(&self, f: F) -> FileBufferAsync { + pub fn build(&self, f: F) -> FileBuffer { let buffer = self.internal_build(); - FileBufferAsync::new(buffer, f) + FileBuffer::new(buffer, f) } } diff --git a/src/buffer/async_buf.rs b/src/buffer/async_buf.rs index 7f0fdae..0764705 100644 --- a/src/buffer/async_buf.rs +++ b/src/buffer/async_buf.rs @@ -1,5 +1,3 @@ -#![cfg(feature = "async-tokio")] - use super::Buffer; use std::io::{self, SeekFrom}; @@ -70,7 +68,7 @@ impl Default for AsyncFileBufState { /// let fb = FileBufferOptions::new() /// .with_block_size(4096) /// .with_max_cache(1_000_000) -/// .build_async(file); +/// .build(file); /// # }; /// ``` #[pin_project] @@ -633,9 +631,7 @@ mod test { let file = lorem_ipsom(); let file = File::open(file).await.unwrap(); - let mut f = FileBufferOptions::new() - .with_block_size(13) - .build_async(file); + let mut f = FileBufferOptions::new().with_block_size(13).build(file); let mut buf = [0u8; 100]; for i in 0..(lic.len() / 100) { @@ -662,7 +658,7 @@ mod test { let mut f = FileBufferOptions::new() .with_block_size(13) - .build_async(&mut file); + .build(&mut file); f.write_all(SHORT).await.unwrap(); f.flush().await.unwrap(); @@ -687,7 +683,7 @@ mod test { .unwrap(); let mut f = FileBufferOptions::new() .with_block_size(13) - .build_async(&mut file); + .build(&mut file); f.write_all(b"Hello World").await.unwrap(); f.flush().await.unwrap(); @@ -718,7 +714,7 @@ mod test { let mut f = FileBufferOptions::new() .with_block_size(13) - .build_async(&mut file); + .build(&mut file); f.write_all(lic.as_slice()).await.unwrap(); f.flush().await.unwrap(); @@ -744,7 +740,7 @@ mod test { let mut f = FileBufferOptions::new() .with_block_size(13) - .build_async(&mut file); + .build(&mut file); f.seek(SeekFrom::Start(9)).await.unwrap(); f.write_all(b"Hello World").await.unwrap(); @@ -769,7 +765,7 @@ mod test { let mut f = FileBufferOptions::new() .with_block_size(13) .with_max_blocks(13) - .build_async(&mut file); + .build(&mut file); let mut buf = [0u8; 100]; for i in 0..(lic.len() / 100) { @@ -790,7 +786,7 @@ mod test { let mut file = File::open(file).await.unwrap(); let mut f = FileBufferOptions::new() .with_block_size(13) - .build_async(&mut file); + .build(&mut file); let mut buf = [0u8; 100]; f.seek(SeekFrom::End(100)).await.unwrap(); @@ -813,7 +809,7 @@ mod test { let mut f = FileBufferOptions::new() .with_block_size(13) .with_max_blocks(13) - .build_async(&mut file); + .build(&mut file); let mut rng = StdRng::from_seed(*b"Hiya World This is a random seed"); // let mut rng = StdRng::from_entropy(); diff --git a/src/channel.rs b/src/channel.rs index 1e08f5a..6132a1a 100644 --- a/src/channel.rs +++ b/src/channel.rs @@ -35,24 +35,6 @@ impl ChanSend { pub async fn send(&self, data: T) -> Result<(), SendError> { self.0.send(data).await } - - /// Send a message to the receiver - /// - /// This is the same as [Self::send] when feature `sync` is set - #[cfg(feature = "async")] - #[inline] - pub fn send_blocking(&self, data: T) -> Result<(), SendError> { - self.0.send_blocking(data) - } - - /// Send a message to the receiver - /// - /// This is the same as [Self::send] when feature `sync` is set - #[cfg(feature = "sync")] - #[inline] - pub fn send_blocking(&self, data: T) -> Result<(), SendError> { - self.0.send(data) - } } #[maybe_async] impl ChanRecv { @@ -63,26 +45,4 @@ impl ChanRecv { pub async fn recv(&self) -> Result { self.0.recv().await } - - /// Receive a message from a sender. - /// - /// This will wait until a message is ready - /// - /// This is the same as [Self::recv] when feature `sync` is set - #[cfg(feature = "async")] - #[inline] - pub fn recv_blocking(&self) -> Result { - self.0.recv_blocking() - } - - /// Receive a message from a sender. - /// - /// This will wait until a message is ready - /// - /// This is the same as [Self::recv] when feature `sync` is set - #[cfg(feature = "sync")] - #[inline] - pub fn recv_blocking(&self) -> Result { - self.0.recv() - } } diff --git a/src/concurrent.rs b/src/concurrent.rs index 29cd67f..ad3f47e 100644 --- a/src/concurrent.rs +++ b/src/concurrent.rs @@ -1,16 +1,17 @@ use enum_as_inner::EnumAsInner; use maybe_async::maybe_async; -use std::io::{self, Read, Seek, SeekFrom}; - use crate::{ channel::{channel, ChanRecv, ChanSend}, data::{Data, PartialItem}, engine::{Engine, MbonParserRead}, errors::{MbonError, MbonResult}, marks::Mark, + stream::{Reader, Seeker}, }; +use std::io; + #[cfg(feature = "sync")] use std::thread::{spawn, JoinHandle}; #[cfg(feature = "async-tokio")] @@ -19,24 +20,24 @@ use tokio::task::{spawn, JoinHandle}; #[derive(EnumAsInner)] enum RequestE { ParseMark { - location: SeekFrom, + location: u64, }, ParseItem { - location: SeekFrom, + location: u64, }, ParseData { mark: Mark, - location: SeekFrom, + location: u64, }, ParseItemN { - location: SeekFrom, + location: u64, count: Option, bytes: u64, parse_data: bool, }, ParseDataN { mark: Mark, - location: SeekFrom, + location: u64, n: usize, }, Close, @@ -50,7 +51,7 @@ pub struct Request { pub enum Response { ParseMark(MbonResult<(Mark, u64)>), ParseItem(MbonResult), - ParseData(MbonResult<(Data, u64)>), + ParseData(MbonResult), ParseDataN(MbonResult>), ParseItemN(MbonResult>), Stopped, @@ -109,12 +110,12 @@ impl ConcurrentEngineClient { #[maybe_async] impl MbonParserRead for ConcurrentEngineClient { - async fn parse_mark(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)> { + async fn parse_mark(&mut self, location: u64) -> MbonResult<(Mark, u64)> { let response = self.send_request(RequestE::ParseMark { location }).await?; Self::expect(response.into_parse_mark()) } - async fn parse_data(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)> { + async fn parse_data(&mut self, mark: &Mark, location: u64) -> MbonResult { let response = self .send_request(RequestE::ParseData { mark: mark.to_owned(), @@ -124,14 +125,14 @@ impl MbonParserRead for ConcurrentEngineClient { Self::expect(response.into_parse_data()) } - async fn parse_item(&mut self, location: SeekFrom) -> MbonResult { + async fn parse_item(&mut self, location: u64) -> MbonResult { let response = self.send_request(RequestE::ParseItem { location }).await?; Self::expect(response.into_parse_item()) } async fn parse_item_n( &mut self, - location: SeekFrom, + location: u64, count: Option, bytes: u64, parse_data: bool, @@ -150,7 +151,7 @@ impl MbonParserRead for ConcurrentEngineClient { async fn parse_data_n( &mut self, mark: &Mark, - location: SeekFrom, + location: u64, n: usize, ) -> MbonResult> { let response = self @@ -169,7 +170,7 @@ impl MbonParserRead for ConcurrentEngineClient { #[maybe_async] impl ConcurrentEngineWrapper where - F: Read + Seek + Send + 'static, + F: Reader + Seeker + Send + 'static, { pub fn new(engine: Engine) -> (Self, ConcurrentEngineClient) { let (send, recv) = channel(); diff --git a/src/data.rs b/src/data.rs index 99e9bc3..47b8040 100644 --- a/src/data.rs +++ b/src/data.rs @@ -2,18 +2,17 @@ use enum_as_inner::EnumAsInner; use maybe_async::maybe_async; use std::{ char::{self}, - io::{Read, Seek, SeekFrom}, + io::SeekFrom, mem, ops::Deref, sync::Arc, }; -use byteorder::{LittleEndian, ReadBytesExt}; - use crate::{ engine::MbonParserRead, errors::{MbonError, MbonResult}, marks::{Mark, Size}, + stream::{Reader, Seeker}, }; macro_rules! number_type { @@ -28,7 +27,8 @@ macro_rules! number_type { } } impl $name { - pub(crate) fn parse($file: &mut R) -> MbonResult { + #[maybe_async] + pub(crate) async fn parse($file: &mut R) -> MbonResult { let val = $read; Ok(Self(val)) } @@ -51,21 +51,21 @@ macro_rules! char_impl { }; } -number_type!(U8, u8, f: f.read_u8()?); -number_type!(U16, u16, f: f.read_u16::()?); -number_type!(U32, u32, f: f.read_u32::()?); -number_type!(U64, u64, f: f.read_u64::()?); -number_type!(I8, i8, f: f.read_i8()?); -number_type!(I16, i16, f: f.read_i16::()?); -number_type!(I32, i32, f: f.read_i32::()?); -number_type!(I64, i64, f: f.read_i64::()?); -number_type!(F32, f32, f: f.read_f32::()?); -number_type!(F64, f64, f: f.read_f64::()?); -number_type!(C8, u8, f: f.read_u8()?); +number_type!(U8, u8, f: f.read_u8().await?); +number_type!(U16, u16, f: f.read_u16_le().await?); +number_type!(U32, u32, f: f.read_u32_le().await?); +number_type!(U64, u64, f: f.read_u64_le().await?); +number_type!(I8, i8, f: f.read_i8().await?); +number_type!(I16, i16, f: f.read_i16_le().await?); +number_type!(I32, i32, f: f.read_i32_le().await?); +number_type!(I64, i64, f: f.read_i64_le().await?); +number_type!(F32, f32, f: f.read_f32_le().await?); +number_type!(F64, f64, f: f.read_f64_le().await?); +number_type!(C8, u8, f: f.read_u8().await?); char_impl!(C8); -number_type!(C16, u16, f: f.read_u16::()?); +number_type!(C16, u16, f: f.read_u16_le().await?); char_impl!(C16); -number_type!(C32, u32, f: f.read_u32::()?); +number_type!(C32, u32, f: f.read_u32_le().await?); char_impl!(C32); #[derive(Debug, Clone)] @@ -83,9 +83,10 @@ impl From for String { } } impl Str { - pub(crate) fn parse(f: &mut R, l: &Size) -> MbonResult { + #[maybe_async] + pub(crate) async fn parse(f: &mut R, l: &Size) -> MbonResult { let mut buf = vec![0u8; **l as usize]; - f.read_exact(buf.as_mut_slice())?; + f.read_exact(buf.as_mut_slice()).await?; let val = String::from_utf8(buf).map_err(|err| MbonError::InvalidData(err.into()))?; Ok(Self(val)) } @@ -132,7 +133,7 @@ impl List { let mut len = self.items.len(); loop { - let (mark, pos) = client.parse_mark(SeekFrom::Start(location)).await?; + let (mark, pos) = client.parse_mark(location).await?; let item = PartialItem::new(mark, pos); location = item.location + item.mark.total_len(); if location > self.end { @@ -190,9 +191,7 @@ impl Array { let len = self.mark.data_len(); let location = self.start + len * (index as u64); - let (data, _) = client - .parse_data(&self.mark, SeekFrom::Start(location)) - .await?; + let data = client.parse_data(&self.mark, location).await?; self.items[index] = Some(data); Ok(self.items[index].as_mut()) @@ -255,9 +254,7 @@ impl Struct { offset += key_len; } - let (data, _) = client - .parse_data(mark, SeekFrom::Start(self.start + offset)) - .await?; + let data = client.parse_data(mark, self.start + offset).await?; let _ = mem::replace(val, Some(data)); @@ -285,10 +282,10 @@ impl Struct { pub async fn fetch_by_key<'t, E: MbonParserRead>( &'t mut self, client: &mut E, - key: &Data, + _key: &Data, ) -> MbonResult> { for i in 0..self.items.len() { - if let Some(k) = self.fetch_key(client, i).await? { + if let Some(_k) = self.fetch_key(client, i).await? { todo!() } } @@ -319,41 +316,47 @@ pub enum Data { Struct(Struct), } +#[maybe_async] impl Data { - pub(crate) fn parse(f: &mut R, mark: &Mark) -> MbonResult { + pub(crate) async fn parse(f: &mut R, mark: &Mark) -> MbonResult { Ok(match mark { Mark::Null => Self::Null, Mark::Unsigned(b) => match b { - 1 => Self::U8(U8::parse(f)?), - 2 => Self::U16(U16::parse(f)?), - 4 => Self::U32(U32::parse(f)?), - 8 => Self::U64(U64::parse(f)?), + 1 => Self::U8(U8::parse(f).await?), + 2 => Self::U16(U16::parse(f).await?), + 4 => Self::U32(U32::parse(f).await?), + 8 => Self::U64(U64::parse(f).await?), _ => return Err(MbonError::InvalidMark), }, Mark::Signed(b) => match b { - 1 => Self::I8(I8::parse(f)?), - 2 => Self::I16(I16::parse(f)?), - 4 => Self::I32(I32::parse(f)?), - 8 => Self::I64(I64::parse(f)?), + 1 => Self::I8(I8::parse(f).await?), + 2 => Self::I16(I16::parse(f).await?), + 4 => Self::I32(I32::parse(f).await?), + 8 => Self::I64(I64::parse(f).await?), _ => return Err(MbonError::InvalidMark), }, Mark::Float(b) => match b { - 4 => Self::F32(F32::parse(f)?), - 8 => Self::F64(F64::parse(f)?), + 4 => Self::F32(F32::parse(f).await?), + 8 => Self::F64(F64::parse(f).await?), _ => return Err(MbonError::InvalidMark), }, Mark::Char(b) => match b { - 1 => Self::C8(C8::parse(f)?), - 2 => Self::C16(C16::parse(f)?), - 4 => Self::C32(C32::parse(f)?), + 1 => Self::C8(C8::parse(f).await?), + 2 => Self::C16(C16::parse(f).await?), + 4 => Self::C32(C32::parse(f).await?), _ => return Err(MbonError::InvalidMark), }, - Mark::String(l) => Self::String(Str::parse(f, l)?), - Mark::Array(v, n) => Self::Array(Array::new(f.stream_position()?, v.clone(), &n)?), - Mark::List(l) => Self::List(List::new(f.stream_position()?, l)?), - Mark::Struct(k, v, n) => { - Self::Struct(Struct::new(f.stream_position()?, k.clone(), v.clone(), &n)?) + Mark::String(l) => Self::String(Str::parse(f, l).await?), + Mark::Array(v, n) => { + Self::Array(Array::new(f.stream_position().await?, v.clone(), &n)?) } + Mark::List(l) => Self::List(List::new(f.stream_position().await?, l)?), + Mark::Struct(k, v, n) => Self::Struct(Struct::new( + f.stream_position().await?, + k.clone(), + v.clone(), + &n, + )?), Mark::Map(_) => todo!(), Mark::Enum(_, _) => todo!(), Mark::Space => todo!(), @@ -395,6 +398,7 @@ pub struct PartialItem { location: u64, } +#[maybe_async] impl PartialItem { pub fn new(mark: Mark, location: u64) -> Self { Self { @@ -404,9 +408,9 @@ impl PartialItem { } } - pub(crate) fn parse_data(&mut self, f: &mut R) -> MbonResult<()> { - f.seek(SeekFrom::Start(self.location))?; - let data = Data::parse(f, &self.mark)?; + pub(crate) async fn parse_data(&mut self, f: &mut R) -> MbonResult<()> { + f.seek(SeekFrom::Start(self.location)).await?; + let data = Data::parse(f, &self.mark).await?; self.data = Some(data); Ok(()) } diff --git a/src/engine.rs b/src/engine.rs index f05e19c..52d2b2c 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -1,18 +1,23 @@ use maybe_async::maybe_async; use std::{ - fs::File, - io::{self, Read, Seek, SeekFrom}, + io::{self, SeekFrom}, path::Path, }; -#[cfg(feature = "async")] -use std::sync::{Arc, Mutex, MutexGuard}; #[cfg(feature = "async-tokio")] -use tokio::task::{spawn_blocking, JoinHandle}; +use tokio::{ + fs::File, + io::{AsyncReadExt, AsyncSeekExt}, +}; #[cfg(feature = "sync")] use std::thread::JoinHandle; +#[cfg(feature = "async-tokio")] +use tokio::task::JoinHandle; + +#[cfg(feature = "sync")] +use std::{fs::File, io::Read, io::Seek}; use crate::{ buffer::{FileBuffer, FileBufferOptions}, @@ -20,192 +25,147 @@ use crate::{ data::{Data, PartialItem}, errors::{MbonError, MbonResult}, marks::Mark, + stream::{Reader, Seeker}, }; /// Functions that are available in an Mbon engine reader /// /// These are primarily functions that are for [crate::data] items to use for /// parsing. -/// -/// The specific functions in this trait need to be narrowed down a bit more. -/// -/// There should be functions that are specialized to the different types of -/// items that are available. -/// -/// I would also like the idea to be able to parse an item in its entirety if -/// requested. Currently, it is setup so that each item that is parsed is only -/// partially parsed. #[maybe_async] pub trait MbonParserRead { - async fn parse_mark(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)>; - async fn parse_data(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)>; - async fn parse_item(&mut self, location: SeekFrom) -> MbonResult; + async fn parse_mark(&mut self, location: u64) -> MbonResult<(Mark, u64)>; + async fn parse_data(&mut self, mark: &Mark, location: u64) -> MbonResult; + async fn parse_item(&mut self, location: u64) -> MbonResult; async fn parse_item_n( &mut self, - location: SeekFrom, + location: u64, count: Option, bytes: u64, parse_data: bool, ) -> MbonResult>; - async fn parse_data_n( - &mut self, - mark: &Mark, - location: SeekFrom, - n: usize, - ) -> MbonResult>; + async fn parse_data_n(&mut self, mark: &Mark, location: u64, n: usize) + -> MbonResult>; } -#[cfg(feature = "sync")] -type Reader = FileBuffer; -#[cfg(feature = "async")] -type Reader = Arc>>; - /// Mbon Engine /// /// Manages I/O operations for an Mbon file. pub struct Engine { - file: Reader, -} - -#[cfg(feature = "async")] -impl Clone for Engine { - fn clone(&self) -> Self { - Engine { - file: self.file.clone(), - } - } + file: FileBuffer, } +#[maybe_async] impl Engine { /// Open an Mbon file in write mode - pub fn open_write(path: impl AsRef) -> io::Result { + pub async fn open_write(path: impl AsRef) -> io::Result { let f = File::options() .read(true) .write(true) .create(true) - .open(path)?; + .open(path) + .await?; Ok(Self::new(f)) } /// Open an Mbon file in read mode - pub fn open_read(path: impl AsRef) -> io::Result { - let f = File::options().read(true).open(path)?; + pub async fn open_read(path: impl AsRef) -> io::Result { + let f = File::options().read(true).open(path).await?; Ok(Self::new(f)) } } -#[cfg(feature = "async")] -impl Engine { - #[inline] - fn get_file(&mut self) -> MutexGuard> { - self.file.lock().unwrap() - } - - #[inline] - fn new_file(f: FileBuffer) -> Arc>> { - Arc::new(Mutex::new(f)) - } -} - -#[cfg(feature = "sync")] -impl Engine { - #[inline] - fn get_file(&mut self) -> &mut FileBuffer { - &mut self.file - } - #[inline] - fn new_file(f: FileBuffer) -> FileBuffer { - f - } -} - -impl Engine -where - F: Read + Seek + Send + 'static, -{ - /// Spawn a new thread to process engine requests +impl Engine { + /// Spawn a concurrent future which controls the engine and allow for + /// multiple clients to concurrently make requests of the engine. /// - /// This will return a [JoinHandle] for the new thread and an - /// [ConcurrentEngineClient] which will allow for multiple concurrent - /// requests to the engine. - pub fn spawn_client_thread(self) -> (JoinHandle>, ConcurrentEngineClient) { + /// This works in both synchronous and asynchronous mode. + pub fn spawn_concurrent(self) -> (JoinHandle>, ConcurrentEngineClient) { let (wrapper, client) = ConcurrentEngineWrapper::new(self); - let handle = wrapper.spawn(); - (handle, client) + let future = wrapper.spawn(); + (future, client) } } +#[maybe_async] impl Engine where - F: Read + Seek, + F: Reader + Seeker, { /// Create a new engine from a file pub fn new(file: F) -> Self { Self { - file: Self::new_file(FileBufferOptions::new().build(file)), + file: FileBufferOptions::new().build(file), } } - /// Synchronously verify the signature - pub fn verify_signature_sync(&mut self) -> MbonResult { - #[allow(unused_mut)] - let mut file = self.get_file(); - file.rewind()?; + /// Verify that the signature of the file is correct + pub async fn verify_signature(&mut self) -> MbonResult { + let file = &mut self.file; + + file.rewind().await?; let mut buf = [0u8; 8]; - file.read_exact(&mut buf)?; + file.read_exact(&mut buf).await?; const EXPECTED: [u8; 8] = [0xEE, 0x6D, 0x62, 0x6F, 0x6E, 0x0D, 0x0A, 0x00]; - if buf != EXPECTED { - return Ok(false); - } - Ok(true) + Ok(buf == EXPECTED) + } +} + +#[maybe_async] +impl MbonParserRead for Engine +where + F: Reader + Seeker, +{ + async fn parse_mark(&mut self, location: u64) -> MbonResult<(Mark, u64)> { + let file = &mut self.file; + + file.seek(SeekFrom::Start(location)).await?; + let (m, len) = Mark::parse(&mut *file).await?; + Ok((m, location + len as u64)) } - /// Synchronously parse a mark at the given location - pub fn parse_mark_sync(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)> { - #[allow(unused_mut)] - let mut file = self.get_file(); - let pos = file.seek(location)?; - let (m, _) = Mark::parse(&mut *file)?; - Ok((m, pos)) + async fn parse_data(&mut self, mark: &Mark, location: u64) -> MbonResult { + let file = &mut self.file; + + file.seek(SeekFrom::Start(location)).await?; + let data = Data::parse(&mut *file, mark).await?; + Ok(data) } - /// Synchronously parse an item at the given location - pub fn parse_item_sync(&mut self, location: SeekFrom) -> MbonResult { - #[allow(unused_mut)] - let mut file = self.get_file(); - let pos = file.seek(location)?; - let (m, _) = Mark::parse(&mut *file)?; - let mut item = PartialItem::new(m, pos); - item.parse_data(&mut *file)?; + async fn parse_item(&mut self, location: u64) -> MbonResult { + let file = &mut self.file; + + file.seek(SeekFrom::Start(location)).await?; + let (m, _) = Mark::parse(&mut *file).await?; + let mut item = PartialItem::new(m, location); + item.parse_data(&mut *file).await?; Ok(item) } - /// Synchronously parse several items in a sequence - pub fn parse_item_n_sync( + async fn parse_item_n( &mut self, - location: SeekFrom, + location: u64, count: Option, bytes: u64, parse_data: bool, ) -> MbonResult> { - #[allow(unused_mut)] - let mut file = self.get_file(); + let file = &mut self.file; let mut items = Vec::new(); let mut read = 0; - let mut pos = file.seek(location)?; + let mut pos = file.seek(SeekFrom::Start(location)).await?; while count.map(|count| items.len() < count).unwrap_or(true) && read < bytes { - let (m, _) = Mark::parse(&mut *file)?; + let (m, _) = Mark::parse(&mut *file).await?; let mut item = PartialItem::new(m, pos); if parse_data { - item.parse_data(&mut *file)?; + item.parse_data(&mut *file).await?; } let len = item.mark.total_len(); read += len; - pos = file.seek(SeekFrom::Start(pos + len))?; + pos = file.seek(SeekFrom::Start(pos + len)).await?; items.push(item); } @@ -216,96 +176,25 @@ where Ok(items) } - pub fn parse_data_sync(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)> { - #[allow(unused_mut)] - let mut file = self.get_file(); - let pos = file.seek(location)?; - let data = Data::parse(&mut *file, mark)?; - Ok((data, pos)) - } - - pub fn parse_data_n_sync( + async fn parse_data_n( &mut self, mark: &Mark, - location: SeekFrom, + location: u64, n: usize, ) -> MbonResult> { - #[allow(unused_mut)] - let mut file = self.get_file(); + let file = &mut self.file; let mut items = Vec::new(); - let start = file.seek(location)?; + let start = file.seek(SeekFrom::Start(location)).await?; let len = mark.data_len(); for i in 0..n { - file.seek(SeekFrom::Start(start + (len * i as u64)))?; - let data = Data::parse(&mut *file, mark)?; + file.seek(SeekFrom::Start(start + (len * i as u64))).await?; + let data = Data::parse(&mut *file, mark).await?; items.push(data); } Ok(items) } } - -#[cfg(feature = "async-tokio")] -macro_rules! mbon_parser_impl { - ($self:ident, $s:ident => $expr:expr) => {{ - let mut $s = $self.clone(); - spawn_blocking(move || $expr).await.unwrap() - }}; - ($self:ident, ($($to_clone:ident),*) $s:ident => $expr:expr) => {{ - let mut $s = $self.clone(); - $(let $to_clone = $to_clone.clone());*; - spawn_blocking(move || $expr).await.unwrap() - }}; -} - -#[cfg(feature = "sync")] -macro_rules! mbon_parser_impl { - ($self:ident, $s:ident => $expr:expr) => {{ - let $s = $self; - $expr - }}; - ($self:ident, ($($to_clone:ident),*) $s:ident => $expr:expr) => {{ - let $s = $self; - $expr - }}; -} - -#[maybe_async] -impl MbonParserRead for Engine -where - F: Read + Seek + Send + 'static, -{ - async fn parse_mark(&mut self, location: SeekFrom) -> MbonResult<(Mark, u64)> { - mbon_parser_impl!(self, s => s.parse_mark_sync(location)) - } - - async fn parse_data(&mut self, mark: &Mark, location: SeekFrom) -> MbonResult<(Data, u64)> { - mbon_parser_impl!(self, (mark) s => s.parse_data_sync(&mark, location)) - } - - async fn parse_item(&mut self, location: SeekFrom) -> MbonResult { - mbon_parser_impl!(self, s => s.parse_item_sync(location)) - } - - async fn parse_item_n( - &mut self, - location: SeekFrom, - count: Option, - bytes: u64, - parse_data: bool, - ) -> MbonResult> { - mbon_parser_impl!(self, s => s.parse_item_n_sync(location, count, bytes, parse_data)) - } - - async fn parse_data_n( - &mut self, - mark: &Mark, - location: SeekFrom, - n: usize, - ) -> MbonResult> { - mbon_parser_impl!(self, (mark) s => s.parse_data_n_sync(&mark, location, n)) - } -} diff --git a/src/lib.rs b/src/lib.rs index ccbd32a..03a86d9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,68 @@ +//! A library for the MBON file type +//! +//! mbon is a binary notation that is inspired by the NBT format. +//! +//! It is formed of a sequence of strongly typed values. Each made up of two +//! parts: a mark which defines the type and size of the data, followed by the +//! data. Marks can be different in size and so a single byte prefix is used to +//! differenciate between types. +//! +//! This format is self-describing which means that it is able to know if the +//! data is not formatted correctly or a different type was stored than what was +//! expected. Another feature of the self-describing nature of the format is +//! that you can skip values in the data without the need to parse the complete +//! item, e.g. A 1GB value can be easily skipped by only reading the mark. +//! +//! # Usage +//! +//! mbon is primarily used with the [crate::engine::Engine] which allows for +//! reading and writing data with a stream. The engine is capable of +//! reading/writing whole items or sections of items. +//! +//! # Features +//! +//! There are two primary features that mbon may be compiled with. +//! +//! * `sync` — Builds the library without any async code/dependencies +//! * `async-tokio` — Builds the library using [tokio]'s async library. +//! +//! These two features are mutually exclusive, so compiling with both `sync` and +//! `async-tokio` will cause a compiler error. +//! +//! ```toml +//! [dependencies] +//! mbon = { version = "0.3.0", features = ["async-tokio"] } +//! ``` +//! +//! These docs are written assuming that the `async-tokio` feature was set, any +//! functions that are marked as async will not be with the `sync` feature. +//! +//! # Spec +//! +//! A specification of the mbon file format can be found at +//! [github.com/ttocsneb/mbon/blob/rewrite/spec/)](https://github.com/ttocsneb/mbon/blob/rewrite/spec/index.md). +//! + #[cfg(not(any(feature = "sync", feature = "async-tokio")))] compile_error!("Feature \"sync\" or \"async-tokio\" is required"); #[cfg(all(feature = "sync", feature = "async-tokio"))] compile_error!("Only one of \"sync\" or \"async-tokio\" can be active at a time"); +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod buffer; -pub mod channel; +#[cfg(any(feature = "sync", feature = "async-tokio"))] +pub(crate) mod channel; +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod concurrent; +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod data; +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod engine; +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod errors; +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod items; +#[cfg(any(feature = "sync", feature = "async-tokio"))] pub mod marks; +#[cfg(any(feature = "sync", feature = "async-tokio"))] +pub mod stream; diff --git a/src/marks.rs b/src/marks.rs index 3ae6767..82cb602 100644 --- a/src/marks.rs +++ b/src/marks.rs @@ -1,17 +1,17 @@ //! [Mark] -use std::{ - io, - io::{Read, Write}, - ops::Deref, - slice, - sync::Arc, -}; +use std::{io, ops::Deref, slice, sync::Arc}; -use byteorder::ReadBytesExt; use enum_as_inner::EnumAsInner; +use maybe_async::maybe_async; + +#[cfg(feature = "async")] +use async_recursion::async_recursion; -use crate::errors::{MbonError, MbonResult}; +use crate::{ + errors::{MbonError, MbonResult}, + stream::{Reader, Writer}, +}; /// Size indicator for marks #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -37,18 +37,19 @@ impl From for u64 { } } +#[maybe_async] impl Size { /// Parse a size from a reader /// /// This expects a dynamically sized Size indicator from _insert link to /// spec_. - pub fn parse(f: &mut R) -> MbonResult<(Self, usize)> { + pub async fn parse(f: &mut R) -> MbonResult<(Self, usize)> { let mut value = 0; let mut read = 0; let mut i = 0; loop { - let b = f.read_u8()?; + let b = f.read_u8().await?; let v = (b & 0b0111_1111) as u64; if i == 9 && b > 1 { // 9 * 7 + 1 == 64 @@ -70,7 +71,7 @@ impl Size { /// /// This will write a dynamically sized Size indicator from _insert link to /// spec_. - pub fn write(&self, f: &mut W) -> io::Result { + pub async fn write(&self, f: &mut W) -> io::Result { let mut value = self.0; let mut written = 0; while self.0 > 0 { @@ -79,7 +80,7 @@ impl Size { if value > 0 { v |= 0b1000_0000; } - f.write_all(slice::from_ref(&v))?; + f.write_all(slice::from_ref(&v)).await?; written += 1; } Ok(written) @@ -155,6 +156,7 @@ const POINTER_ID: u8 = 0x28; const RC_ID: u8 = 0x2c; const HEAP_ID: u8 = 0x10; +#[maybe_async] impl Mark { /// Get the binary id of the mark pub fn id(&self) -> u8 { @@ -179,8 +181,9 @@ impl Mark { } /// Parse a mark from a reader - pub fn parse(f: &mut R) -> MbonResult<(Self, usize)> { - let id = f.read_u8()?; + #[cfg_attr(feature = "async", async_recursion)] + pub(crate) async fn parse(f: &mut R) -> MbonResult<(Self, usize)> { + let id = f.read_u8().await?; let mut len = 1; let mark = match id & 0b1111_1100 { NULL_ID => Self::Null, @@ -189,55 +192,55 @@ impl Mark { FLOAT_ID => Self::Float(len_b(id)), CHAR_ID => Self::Char(len_b(id)), STRING_ID => { - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::String(size) } ARRAY_ID => { - let (val, r) = Self::parse(f)?; + let (val, r) = Self::parse(f).await?; len += r; - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::Array(Arc::new(val), size) } LIST_ID => { - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::List(size) } STRUCT_ID => { - let (key, r) = Self::parse(f)?; + let (key, r) = Self::parse(f).await?; len += r; - let (val, r) = Self::parse(f)?; + let (val, r) = Self::parse(f).await?; len += r; - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::Struct(Arc::new(key), Arc::new(val), size) } MAP_ID => { - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::Map(size) } ENUM_ID => { - let (mark, r) = Self::parse(f)?; + let (mark, r) = Self::parse(f).await?; len += r; Self::Enum(len_b(id), Arc::new(mark)) } SPACE_ID => Self::Space, PADDING_ID => { - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::Padding(size) } POINTER_ID => Self::Pointer(len_b(id)), RC_ID => { - let (mark, r) = Self::parse(f)?; + let (mark, r) = Self::parse(f).await?; len += r; Self::Rc(len_b(id), Arc::new(mark)) } HEAP_ID => { - let (size, r) = Size::parse(f)?; + let (size, r) = Size::parse(f).await?; len += r; Self::Heap(size) } @@ -247,39 +250,40 @@ impl Mark { } /// Write the mark to a writer - pub fn write(&self, f: &mut W) -> io::Result { - f.write_all(slice::from_ref(&self.id()))?; + #[cfg_attr(feature = "async", async_recursion)] + pub(crate) async fn write(&self, f: &mut W) -> io::Result { + f.write_all(slice::from_ref(&self.id())).await?; let mut written = 1; match self { Mark::String(l) => { - written += l.write(f)?; + written += l.write(f).await?; } Mark::Array(v, n) => { - written += v.write(f)?; - written += n.write(f)?; + written += v.write(f).await?; + written += n.write(f).await?; } Mark::List(l) => { - written += l.write(f)?; + written += l.write(f).await?; } Mark::Struct(k, v, n) => { - written += k.write(f)?; - written += v.write(f)?; - written += n.write(f)?; + written += k.write(f).await?; + written += v.write(f).await?; + written += n.write(f).await?; } Mark::Map(l) => { - written += l.write(f)?; + written += l.write(f).await?; } Mark::Enum(_, v) => { - written += v.write(f)?; + written += v.write(f).await?; } Mark::Padding(l) => { - written += l.write(f)?; + written += l.write(f).await?; } Mark::Rc(_, v) => { - written += v.write(f)?; + written += v.write(f).await?; } Mark::Heap(l) => { - written += l.write(f)?; + written += l.write(f).await?; } _ => {} } @@ -287,14 +291,6 @@ impl Mark { Ok(written) } - /// Write the mark to a byte buffer - #[inline] - pub fn write_to_buf(&self) -> io::Result> { - let mut buf = Vec::new(); - self.write(&mut buf)?; - Ok(buf) - } - /// Get the length of the data the mark represents pub fn data_len(&self) -> u64 { match self { @@ -340,19 +336,22 @@ impl Mark { } } +// #[cfg(feature = "sync")] #[cfg(test)] mod test { use super::*; - #[test] - fn test_simple_parse() { + #[maybe_async] + #[cfg_attr(feature = "sync", test)] + #[cfg_attr(feature = "async-tokio", tokio::test)] + async fn test_simple_parse() { let mut buf: &[u8] = &[0xc0, 0x64, 0x32]; - let (mark, read) = Mark::parse(&mut buf).unwrap(); + let (mark, read) = Mark::parse(&mut buf).await.unwrap(); assert_eq!(read, 1); assert_eq!(mark.is_null(), true); - let (mark, read) = Mark::parse(&mut buf).unwrap(); + let (mark, read) = Mark::parse(&mut buf).await.unwrap(); assert_eq!(read, 1); assert_eq!(mark.is_unsigned(), true); if let Mark::Unsigned(b) = mark { @@ -361,19 +360,23 @@ mod test { unreachable!(); } - let err = Mark::parse(&mut buf).expect_err("Expected InvalidMark error"); + let err = Mark::parse(&mut buf) + .await + .expect_err("Expected InvalidMark error"); assert_eq!(err.is_invalid_mark(), true); } - #[test] - fn test_size_parse() { + #[maybe_async] + #[cfg_attr(feature = "sync", test)] + #[cfg_attr(feature = "async-tokio", tokio::test)] + async fn test_size_parse() { let mut buf: &[u8] = &[0x32, 0x80, 0x31]; - let (size, read) = Size::parse(&mut buf).unwrap(); + let (size, read) = Size::parse(&mut buf).await.unwrap(); assert_eq!(read, 1); assert_eq!(*size, 0x32); - let (size, read) = Size::parse(&mut buf).unwrap(); + let (size, read) = Size::parse(&mut buf).await.unwrap(); assert_eq!(read, 2); assert_eq!(*size, 0x1880); } diff --git a/src/stream.rs b/src/stream.rs new file mode 100644 index 0000000..54786c9 --- /dev/null +++ b/src/stream.rs @@ -0,0 +1,98 @@ +#[cfg(feature = "sync")] +use std::io::{self, Read, Seek, Write}; + +#[cfg(feature = "sync")] +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; + +#[cfg(feature = "async-tokio")] +use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, AsyncWrite, AsyncWriteExt}; + +#[cfg(feature = "async-tokio")] +pub trait Reader: AsyncRead + AsyncReadExt + Unpin + Send {} +#[cfg(feature = "async-tokio")] +pub trait Writer: AsyncWrite + AsyncWriteExt + Unpin + Send {} +#[cfg(feature = "async-tokio")] +pub trait Seeker: AsyncSeek + AsyncSeekExt + Unpin + Send {} + +#[cfg(feature = "async-tokio")] +impl Reader for F {} +#[cfg(feature = "async-tokio")] +impl Writer for F {} +#[cfg(feature = "async-tokio")] +impl Seeker for F {} + +#[cfg(feature = "sync")] +pub trait Reader: Read { + fn read_u8(&mut self) -> io::Result { + ReadBytesExt::read_u8(self) + } + fn read_i8(&mut self) -> io::Result { + ReadBytesExt::read_i8(self) + } + fn read_u16_le(&mut self) -> io::Result { + ReadBytesExt::read_u16::(self) + } + fn read_i16_le(&mut self) -> io::Result { + ReadBytesExt::read_i16::(self) + } + fn read_u32_le(&mut self) -> io::Result { + ReadBytesExt::read_u32::(self) + } + fn read_i32_le(&mut self) -> io::Result { + ReadBytesExt::read_i32::(self) + } + fn read_u64_le(&mut self) -> io::Result { + ReadBytesExt::read_u64::(self) + } + fn read_i64_le(&mut self) -> io::Result { + ReadBytesExt::read_i64::(self) + } + fn read_f32_le(&mut self) -> io::Result { + ReadBytesExt::read_f32::(self) + } + fn read_f64_le(&mut self) -> io::Result { + ReadBytesExt::read_f64::(self) + } +} +#[cfg(feature = "sync")] +pub trait Writer: Write { + fn write_u8(&mut self, val: u8) -> io::Result<()> { + WriteBytesExt::write_u8(self, val) + } + fn write_i8(&mut self, val: i8) -> io::Result<()> { + WriteBytesExt::write_i8(self, val) + } + fn write_u16_le(&mut self, val: u16) -> io::Result<()> { + WriteBytesExt::write_u16::(self, val) + } + fn write_i16_le(&mut self, val: i16) -> io::Result<()> { + WriteBytesExt::write_i16::(self, val) + } + fn write_u32_le(&mut self, val: u32) -> io::Result<()> { + WriteBytesExt::write_u32::(self, val) + } + fn write_i32_le(&mut self, val: i32) -> io::Result<()> { + WriteBytesExt::write_i32::(self, val) + } + fn write_u64_le(&mut self, val: u64) -> io::Result<()> { + WriteBytesExt::write_u64::(self, val) + } + fn write_i64_le(&mut self, val: i64) -> io::Result<()> { + WriteBytesExt::write_i64::(self, val) + } + fn write_f32_le(&mut self, val: f32) -> io::Result<()> { + WriteBytesExt::write_f32::(self, val) + } + fn write_f64_le(&mut self, val: f64) -> io::Result<()> { + WriteBytesExt::write_f64::(self, val) + } +} +#[cfg(feature = "sync")] +pub trait Seeker: Seek {} + +#[cfg(feature = "sync")] +impl Reader for F {} +#[cfg(feature = "sync")] +impl Writer for F {} +#[cfg(feature = "sync")] +impl Seeker for F {} From d5b507e82aef373bbab4c0082670b5a10877133f Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Tue, 26 Mar 2024 13:47:44 -0600 Subject: [PATCH 08/10] Update workflow for async/async --- .github/workflows/rust.yml | 12 ++++++++---- src/buffer.rs | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 82aca78..3e05ee4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -16,7 +16,11 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Build - run: cargo build --verbose - - name: Run tests - run: cargo test --verbose + - name: Build Sync + run: cargo build --no-default-fefatures --features sync --verbose + - name: Run Sync tests + run: cargo test --no-default-fefatures --features sync --verbose + - name: Build Async + run: cargo build --no-default-fefatures --features async-tokio --verbose + - name: Run Async tests + run: cargo test --no-default-fefatures --features async-tokio --verbose diff --git a/src/buffer.rs b/src/buffer.rs index 21ccfa2..99b4f99 100644 --- a/src/buffer.rs +++ b/src/buffer.rs @@ -205,7 +205,7 @@ impl Buffer { } } -/// Set options for a [FileBuffer] or [FileBufferAsync]. +/// Set options for a [FileBuffer]. /// /// There are three options that can be set. /// From 15281c0da679768cf43b03de84485f05d06d18bb Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Tue, 26 Mar 2024 13:50:02 -0600 Subject: [PATCH 09/10] =?UTF-8?q?I=20can't=20spell=20=F0=9F=99=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/rust.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 3e05ee4..c33b6fa 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -17,10 +17,10 @@ jobs: steps: - uses: actions/checkout@v3 - name: Build Sync - run: cargo build --no-default-fefatures --features sync --verbose + run: cargo build --no-default-features --features sync --verbose - name: Run Sync tests - run: cargo test --no-default-fefatures --features sync --verbose + run: cargo test --no-default-features --features sync --verbose - name: Build Async - run: cargo build --no-default-fefatures --features async-tokio --verbose + run: cargo build --no-default-features --features async-tokio --verbose - name: Run Async tests - run: cargo test --no-default-fefatures --features async-tokio --verbose + run: cargo test --no-default-features --features async-tokio --verbose From f6f4dde63176f8000368138eeb7569029bd939f1 Mon Sep 17 00:00:00 2001 From: Benjamin Jacobs Date: Mon, 1 Apr 2024 14:54:07 -0600 Subject: [PATCH 10/10] Add docs.json --- .github/docs.json | 12 + .gitignore | 1 + Cargo.lock | 716 ---------------------------------------------- 3 files changed, 13 insertions(+), 716 deletions(-) create mode 100644 .github/docs.json delete mode 100644 Cargo.lock diff --git a/.github/docs.json b/.github/docs.json new file mode 100644 index 0000000..0d57aee --- /dev/null +++ b/.github/docs.json @@ -0,0 +1,12 @@ +{ + "branches": { + "ttocsneb-api": { + "steps": "cargo +nightly doc --no-default-features --features async-tokio,serde" + }, + "develop": { + "steps": "cargo +nightly doc --all-features" + } + }, + "steps": "cargo +nightly doc --no-default-features --features async-tokio", + "docs": "target/doc/" +} diff --git a/.gitignore b/.gitignore index 18ff91d..96c8e9d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target .vscode/ .DS_Store +Cargo.lock diff --git a/Cargo.lock b/Cargo.lock deleted file mode 100644 index 7e6199c..0000000 --- a/Cargo.lock +++ /dev/null @@ -1,716 +0,0 @@ -# This file is automatically @generated by Cargo. -# It is not intended for manual editing. -version = 3 - -[[package]] -name = "addr2line" -version = "0.21.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a30b2e23b9e17a9f90641c7ab1549cd9b44f296d3ccbf309d2863cfe398a0cb" -dependencies = [ - "gimli", -] - -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - -[[package]] -name = "anyhow" -version = "1.0.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" - -[[package]] -name = "async-channel" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28243a43d821d11341ab73c80bed182dc015c514b951616cf79bd4af39af0c3" -dependencies = [ - "concurrent-queue", - "event-listener", - "event-listener-strategy", - "futures-core", - "pin-project-lite", -] - -[[package]] -name = "async-generic" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c033acf5a4f968ece4c8b18c2444b4d4b545fecebfe4b90592a4265643421d" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "async-recursion" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "async-trait" -version = "0.1.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "autocfg" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" - -[[package]] -name = "backtrace" -version = "0.3.69" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2089b7e3f35b9dd2d0ed921ead4f6d318c27680d4a5bd167b3ee120edb105837" -dependencies = [ - "addr2line", - "cc", - "cfg-if", - "libc", - "miniz_oxide", - "object", - "rustc-demangle", -] - -[[package]] -name = "bitflags" -version = "1.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" - -[[package]] -name = "byteorder" -version = "1.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" - -[[package]] -name = "bytes" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" - -[[package]] -name = "cc" -version = "1.0.88" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc" - -[[package]] -name = "cfg-if" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" - -[[package]] -name = "concurrent-queue" -version = "2.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d16048cd947b08fa32c24458a22f5dc5e835264f689f4f5653210c69fd107363" -dependencies = [ - "crossbeam-utils", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.19" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" - -[[package]] -name = "enum-as-inner" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffccbb6966c05b32ef8fbac435df276c4ae4d3dc55a8cd0eb9745e6c12f546a" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "event-listener" -version = "5.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7ad6fd685ce13acd6d9541a30f6db6567a7a24c9ffd4ba2955d29e3f22c8b27" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener-strategy" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "feedafcaa9b749175d5ac357452a9d41ea2911da598fde46ce1fe02c37751291" -dependencies = [ - "event-listener", - "pin-project-lite", -] - -[[package]] -name = "futures" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" - -[[package]] -name = "futures-executor" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-macro" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" - -[[package]] -name = "futures-task" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" - -[[package]] -name = "futures-util" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - -[[package]] -name = "getrandom" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5" -dependencies = [ - "cfg-if", - "libc", - "wasi", -] - -[[package]] -name = "gimli" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4271d37baee1b8c7e4b708028c57d816cf9d2434acb33a549475f78c181f6253" - -[[package]] -name = "heck" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" - -[[package]] -name = "hermit-abi" -version = "0.3.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d3d0e0f38255e7fa3cf31335b3a56f05febd18025f4db5ef7a0cfb4f8da651f" - -[[package]] -name = "libc" -version = "0.2.153" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" - -[[package]] -name = "lock_api" -version = "0.4.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" -dependencies = [ - "autocfg", - "scopeguard", -] - -[[package]] -name = "maybe-async" -version = "0.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cf92c10c7e361d6b99666ec1c6f9805b0bea2c3bd8c78dc6fe98ac5bd78db11" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "mbon" -version = "0.3.0+nightly" -dependencies = [ - "anyhow", - "async-channel", - "async-generic", - "async-recursion", - "async-trait", - "byteorder", - "enum-as-inner", - "futures", - "maybe-async", - "pin-project", - "rand", - "strum", - "thiserror", - "tokio", -] - -[[package]] -name = "memchr" -version = "2.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" - -[[package]] -name = "miniz_oxide" -version = "0.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" -dependencies = [ - "adler", -] - -[[package]] -name = "mio" -version = "0.8.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f3d0b296e374a4e6f3c7b0a1f5a51d748a0d34c85e7dc48fc3fa9a87657fe09" -dependencies = [ - "libc", - "wasi", - "windows-sys", -] - -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "object" -version = "0.32.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6a622008b6e321afc04970976f62ee297fdbaa6f95318ca343e3eebb9648441" -dependencies = [ - "memchr", -] - -[[package]] -name = "parking" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" - -[[package]] -name = "parking_lot" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec", - "windows-targets", -] - -[[package]] -name = "pin-project" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0302c4a0442c456bd56f841aee5c3bfd17967563f6fadc9ceb9f9c23cf3807e0" -dependencies = [ - "pin-project-internal", -] - -[[package]] -name = "pin-project-internal" -version = "1.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "pin-project-lite" -version = "0.2.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" - -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - -[[package]] -name = "ppv-lite86" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" - -[[package]] -name = "proc-macro2" -version = "1.0.78" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" -dependencies = [ - "unicode-ident", -] - -[[package]] -name = "quote" -version = "1.0.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" -dependencies = [ - "proc-macro2", -] - -[[package]] -name = "rand" -version = "0.8.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" -dependencies = [ - "getrandom", -] - -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags", -] - -[[package]] -name = "rustc-demangle" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" - -[[package]] -name = "rustversion" -version = "1.0.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" - -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - -[[package]] -name = "signal-hook-registry" -version = "1.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8229b473baa5980ac72ef434c4415e70c4b5e71b423043adb4ba059f89c99a1" -dependencies = [ - "libc", -] - -[[package]] -name = "slab" -version = "0.4.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] - -[[package]] -name = "smallvec" -version = "1.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" - -[[package]] -name = "socket2" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" -dependencies = [ - "libc", - "windows-sys", -] - -[[package]] -name = "strum" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" -dependencies = [ - "strum_macros", -] - -[[package]] -name = "strum_macros" -version = "0.26.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a3417fc93d76740d974a01654a09777cb500428cc874ca9f45edfe0c4d4cd18" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "rustversion", - "syn", -] - -[[package]] -name = "syn" -version = "2.0.50" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - -[[package]] -name = "thiserror" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.57" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "tokio" -version = "1.36.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" -dependencies = [ - "backtrace", - "bytes", - "libc", - "mio", - "num_cpus", - "parking_lot", - "pin-project-lite", - "signal-hook-registry", - "socket2", - "tokio-macros", - "windows-sys", -] - -[[package]] -name = "tokio-macros" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "unicode-ident" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ceab39d59e4c9499d4e5a8ee0e2735b891bb7308ac83dfb4e80cad195c9f6f3" - -[[package]] -name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" - -[[package]] -name = "windows-sys" -version = "0.48.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" -dependencies = [ - "windows-targets", -] - -[[package]] -name = "windows-targets" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" -dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" - -[[package]] -name = "windows_aarch64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" - -[[package]] -name = "windows_i686_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" - -[[package]] -name = "windows_i686_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" - -[[package]] -name = "windows_x86_64_gnullvm" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.48.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"