Skip to content

Commit

Permalink
week3day1
Browse files Browse the repository at this point in the history
  • Loading branch information
Elainasama committed Oct 8, 2024
1 parent 5c4ab4f commit 477399d
Show file tree
Hide file tree
Showing 8 changed files with 123 additions and 76 deletions.
2 changes: 1 addition & 1 deletion mini-lsm-starter/src/block.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ pub struct Block {
}

const SIZE_U16: usize = std::mem::size_of::<u16>();

const SIZE_U64: usize = std::mem::size_of::<u64>();
impl Block {
/// Encode the internal data to the data layout illustrated in the tutorial
/// Note: You may want to recheck if any of the expected field is missing from your output
Expand Down
23 changes: 15 additions & 8 deletions mini-lsm-starter/src/block/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
use crate::key::{KeySlice, KeyVec};
use bytes::BufMut;

use super::{Block, SIZE_U16};
use super::{Block, SIZE_U16, SIZE_U64};

/// Builds a block.
pub struct BlockBuilder {
Expand Down Expand Up @@ -43,20 +43,25 @@ impl BlockBuilder {

fn add_to_offset(&mut self, key: KeySlice, value: &[u8], offset: u16) -> bool {
// 键值太长
if key.len() > 0xffff || value.len() > 0xffff {
if key.raw_len() > 0xffff || value.len() > 0xffff {
return false;
}
// 除非第一个键值对超出目标块大小,否则应确保编码后的块大小始终小于或等于target_size。
// 新增key_len key val_len value offset

// key_overlap_len (u16) | rest_key_len (u16) | key (rest_key_len)
let key_len = key.len() as u16;
// key_overlap_len (u16) | rest_key_len (u16) | key (rest_key_len) | timestamp (u64)
let key_len = key.key_len() as u16;
let val_len = value.len() as u16;
let key_overlap_len = self.key_overlap_len(key);

let key_rest_len = key_len - key_overlap_len as u16;
if offset > 0
&& self.cal_block_size() + key_rest_len as usize + value.len() + 3 * SIZE_U16 + SIZE_U16
&& self.cal_block_size()
+ key_rest_len as usize
+ value.len()
+ 3 * SIZE_U16
+ SIZE_U16
+ SIZE_U64
> self.block_size
{
return false;
Expand All @@ -65,7 +70,9 @@ impl BlockBuilder {

self.data.put_u16(key_overlap_len as u16);
self.data.put_u16(key_len - key_overlap_len as u16);
self.data.extend(key.raw_ref()[key_overlap_len..].to_vec());
self.data.extend(key.key_ref()[key_overlap_len..].to_vec());
// 3.1 mvcc add timestamp
self.data.put_u64(key.ts());
self.data.put_u16(val_len);
self.data.extend(value.to_vec());
// 第一个插入
Expand All @@ -81,8 +88,8 @@ impl BlockBuilder {
}
let mut l = 0;

while l < key.len() && l < self.first_key.len() {
if key.raw_ref()[l] != self.first_key.raw_ref()[l] {
while l < key.key_len() && l < self.first_key.key_len() {
if key.key_ref()[l] != self.first_key.key_ref()[l] {
break;
}
l += 1;
Expand Down
10 changes: 6 additions & 4 deletions mini-lsm-starter/src/block/iterator.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![allow(unused_variables)] // TODO(you): remove this lint after implementing this mod
#![allow(dead_code)] // TODO(you): remove this lint after implementing this mod

use super::Block;
use super::{Block, SIZE_U64};
use crate::key::{Key, KeySlice, KeyVec};
use bytes::Buf;
use std::sync::Arc;
Expand Down Expand Up @@ -77,15 +77,17 @@ impl BlockIterator {
// offset += 2;
// self.key = Key::from_vec(self.block.data[(offset)..(offset + key_len as usize)].to_vec());

// key_overlap_len (u16) | rest_key_len (u16) | key (rest_key_len)
// key_overlap_len (u16) | rest_key_len (u16) | key (rest_key_len) | ts(u64)
let key_overlap_len = (&self.block.data[offset..offset + SIZE_U16]).get_u16();
offset += SIZE_U16;
let key_rest_len = (&self.block.data[offset..offset + SIZE_U16]).get_u16();
offset += SIZE_U16;
let mut key = self.first_key.raw_ref()[..key_overlap_len as usize].to_vec();
let mut key = self.first_key.key_ref()[..key_overlap_len as usize].to_vec();
key.extend(self.block.data[offset..offset + key_rest_len as usize].to_vec());
self.key = Key::from_vec(key);
offset += key_rest_len as usize;
let ts = (&self.block.data[offset..offset + SIZE_U64]).get_u64();
self.key = Key::from_vec_with_ts(key, ts);
offset += SIZE_U64;
let val_len = (&self.block.data[offset..offset + SIZE_U16]).get_u16();
offset += SIZE_U16;
self.value_range = (offset, offset + val_len as usize);
Expand Down
103 changes: 67 additions & 36 deletions mini-lsm-starter/src/key.rs
Original file line number Diff line number Diff line change
@@ -1,41 +1,54 @@
use std::fmt::Debug;
use std::{cmp::Reverse, fmt::Debug};

use bytes::Bytes;

pub const TS_ENABLED: bool = false;

pub struct Key<T: AsRef<[u8]>>(T);
pub struct Key<T: AsRef<[u8]>>(T, u64);

pub type KeySlice<'a> = Key<&'a [u8]>;
pub type KeyVec = Key<Vec<u8>>;
pub type KeyBytes = Key<Bytes>;

/// For testing purpose, should not use anywhere in your implementation.
pub const TS_ENABLED: bool = true;

/// Temporary, should remove after implementing full week 3 day 1 + 2.
pub const TS_DEFAULT: u64 = 0;

pub const TS_MAX: u64 = u64::MAX;
pub const TS_MIN: u64 = u64::MIN;
pub const TS_RANGE_BEGIN: u64 = u64::MAX;
pub const TS_RANGE_END: u64 = u64::MIN;

impl<T: AsRef<[u8]>> Key<T> {
pub fn into_inner(self) -> T {
self.0
}

pub fn len(&self) -> usize {
pub fn key_len(&self) -> usize {
self.0.as_ref().len()
}

pub fn raw_len(&self) -> usize {
self.0.as_ref().len() + std::mem::size_of::<u64>()
}

pub fn is_empty(&self) -> bool {
self.0.as_ref().is_empty()
}

pub fn for_testing_ts(self) -> u64 {
0
self.1
}
}

impl Key<Vec<u8>> {
pub fn new() -> Self {
Self(Vec::new())
Self(Vec::new(), TS_DEFAULT)
}

/// Create a `KeyVec` from a `Vec<u8>`. Will be removed in week 3.
pub fn from_vec(key: Vec<u8>) -> Self {
Self(key)
/// Create a `KeyVec` from a `Vec<u8>` and a ts. Will be removed in week 3.
pub fn from_vec_with_ts(key: Vec<u8>, ts: u64) -> Self {
Self(key, ts)
}

/// Clears the key and set ts to 0.
Expand All @@ -48,51 +61,66 @@ impl Key<Vec<u8>> {
self.0.extend(data)
}

/// Set the key from a slice without re-allocating. The signature will change in week 3.
pub fn set_ts(&mut self, ts: u64) {
self.1 = ts;
}

/// Set the key from a slice without re-allocating.
pub fn set_from_slice(&mut self, key_slice: KeySlice) {
self.0.clear();
self.0.extend(key_slice.0);
self.1 = key_slice.1;
}

pub fn as_key_slice(&self) -> KeySlice {
Key(self.0.as_slice())
Key(self.0.as_slice(), self.1)
}

pub fn into_key_bytes(self) -> KeyBytes {
Key(self.0.into())
Key(self.0.into(), self.1)
}

/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(&self) -> &[u8] {
pub fn key_ref(&self) -> &[u8] {
self.0.as_ref()
}

pub fn ts(&self) -> u64 {
self.1
}

pub fn for_testing_key_ref(&self) -> &[u8] {
self.0.as_ref()
}

pub fn for_testing_from_vec_no_ts(key: Vec<u8>) -> Self {
Self(key)
Self(key, TS_DEFAULT)
}
}

impl Key<Bytes> {
pub fn new() -> Self {
Self(Bytes::new(), TS_DEFAULT)
}

pub fn as_key_slice(&self) -> KeySlice {
Key(&self.0)
Key(&self.0, self.1)
}

/// Create a `KeyBytes` from a `Bytes`. Will be removed in week 3.
pub fn from_bytes(bytes: Bytes) -> KeyBytes {
Key(bytes)
/// Create a `KeyBytes` from a `Bytes` and a ts.
pub fn from_bytes_with_ts(bytes: Bytes, ts: u64) -> KeyBytes {
Key(bytes, ts)
}

/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(&self) -> &[u8] {
pub fn key_ref(&self) -> &[u8] {
self.0.as_ref()
}

pub fn ts(&self) -> u64 {
self.1
}

pub fn for_testing_from_bytes_no_ts(bytes: Bytes) -> KeyBytes {
Key(bytes)
Key(bytes, TS_DEFAULT)
}

pub fn for_testing_key_ref(&self) -> &[u8] {
Expand All @@ -102,29 +130,32 @@ impl Key<Bytes> {

impl<'a> Key<&'a [u8]> {
pub fn to_key_vec(self) -> KeyVec {
Key(self.0.to_vec())
Key(self.0.to_vec(), self.1)
}

/// Create a key slice from a slice. Will be removed in week 3.
pub fn from_slice(slice: &'a [u8]) -> Self {
Self(slice)
pub fn from_slice(slice: &'a [u8], ts: u64) -> Self {
Self(slice, ts)
}

/// Always use `raw_ref` to access the key in week 1 + 2. This function will be removed in week 3.
pub fn raw_ref(self) -> &'a [u8] {
pub fn key_ref(self) -> &'a [u8] {
self.0
}

pub fn ts(&self) -> u64 {
self.1
}

pub fn for_testing_key_ref(self) -> &'a [u8] {
self.0
}

pub fn for_testing_from_slice_no_ts(slice: &'a [u8]) -> Self {
Self(slice)
Self(slice, TS_DEFAULT)
}

pub fn for_testing_from_slice_with_ts(slice: &'a [u8], _ts: u64) -> Self {
Self(slice)
pub fn for_testing_from_slice_with_ts(slice: &'a [u8], ts: u64) -> Self {
Self(slice, ts)
}
}

Expand All @@ -136,34 +167,34 @@ impl<T: AsRef<[u8]> + Debug> Debug for Key<T> {

impl<T: AsRef<[u8]> + Default> Default for Key<T> {
fn default() -> Self {
Self(T::default())
Self(T::default(), TS_DEFAULT)
}
}

impl<T: AsRef<[u8]> + PartialEq> PartialEq for Key<T> {
fn eq(&self, other: &Self) -> bool {
self.0.eq(&other.0)
(self.0.as_ref(), self.1).eq(&(other.0.as_ref(), other.1))
}
}

impl<T: AsRef<[u8]> + Eq> Eq for Key<T> {}

impl<T: AsRef<[u8]> + Clone> Clone for Key<T> {
fn clone(&self) -> Self {
Self(self.0.clone())
Self(self.0.clone(), self.1)
}
}

impl<T: AsRef<[u8]> + Copy> Copy for Key<T> {}

impl<T: AsRef<[u8]> + PartialOrd> PartialOrd for Key<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.0.partial_cmp(&other.0)
(self.0.as_ref(), Reverse(self.1)).partial_cmp(&(other.0.as_ref(), Reverse(other.1)))
}
}

impl<T: AsRef<[u8]> + Ord> Ord for Key<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.0.cmp(&other.0)
(self.0.as_ref(), Reverse(self.1)).cmp(&(other.0.as_ref(), Reverse(other.1)))
}
}
Loading

0 comments on commit 477399d

Please sign in to comment.