From 6308a9c1a9002a737c95d679b2b2c74b3d7d43ec Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Fri, 18 Aug 2023 23:03:15 +0800 Subject: [PATCH 01/35] wal + memory read/write --- db/memory/db.go | 132 +++++++++++++++++++++++++++++++++++++++ db/memory/db_test.go | 44 +++++++++++++ db/memory/memory.go | 53 ++++++++++++++++ db/memory/memory_test.go | 32 ++++++++++ db/memory/wal.go | 103 ++++++++++++++++++++++++++++++ db/memory/wal_test.go | 29 +++++++++ 6 files changed, 393 insertions(+) create mode 100644 db/memory/db.go create mode 100644 db/memory/db_test.go create mode 100644 db/memory/memory.go create mode 100644 db/memory/memory_test.go create mode 100644 db/memory/wal.go create mode 100644 db/memory/wal_test.go diff --git a/db/memory/db.go b/db/memory/db.go new file mode 100644 index 0000000..345a78c --- /dev/null +++ b/db/memory/db.go @@ -0,0 +1,132 @@ +package memory + +import ( + "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/db/engine" + "strings" +) + +const ( + walFileName = "/db.wal" +) + +type Options struct { + Option config.Options + LogNum uint32 + FileSize int64 + SaveTime int64 + MemSize int64 + TotalMemSize int64 +} + +type Db struct { + option Options + db *engine.DB + mem *MemTable + oldList []*MemTable + wal *Wal + oldListChan chan *MemTable + size int64 +} + +func NewDB(option Options) (*Db, error) { + mem := NewMemTable() + db, err := engine.NewDB(option.Option) + if err != nil { + return nil, err + } + // Create or open the WAL file. + option.Option.DirPath = strings.TrimSuffix(option.Option.DirPath, "/") + wal, err := NewWal(option) + if err != nil { + return nil, err + } + go wal.AsyncSave() + d := &Db{ + mem: mem, + db: db, + wal: wal, + option: option, + oldList: make([]*MemTable, 0), + oldListChan: make(chan *MemTable, 1000000), + } + go d.async() + return d, nil +} + +func (d *Db) Put(key []byte, value []byte) error { + // Write to WAL + err := d.wal.Put(key, value) + if err != nil { + return err + } + + // if sync write, save wal + if d.option.Option.SyncWrite { + err := d.wal.Save() + if err != nil { + return err + } + } + + // if all memTable size > total memTable size, write to db + if d.size > d.option.TotalMemSize { + return d.db.Put(key, value) + } + + // if active memTable size > define size, change to immutable memTable + if d.mem.Size()+int64(len(key)+len(value)) > d.option.MemSize { + // add to immutable memTable list + d.AddOldMemTable(d.mem) + // add to size + d.size += d.mem.Size() + // create new active memTable + d.mem = NewMemTable() + } + + // write to active memTable + d.mem.Put(string(key), value) + return nil +} + +func (d *Db) Get(key []byte) ([]byte, error) { + // first get from memTable + value, err := d.mem.Get(string(key)) + if err == nil { + return value, nil + } + + // if active memTable not found, get from immutable memTable + for _, list := range d.oldList { + value, err = list.Get(string(key)) + if err == nil { + return value, nil + } + } + + // if immutable memTable not found, get from db + return d.db.Get(key) +} + +func (d *Db) Close() error { + err := d.wal.Save() + if err != nil { + return err + } + return d.db.Close() +} + +func (d *Db) AddOldMemTable(oldList *MemTable) { + d.oldListChan <- oldList +} + +func (d *Db) async() { + for oldList := range d.oldListChan { + for key, value := range oldList.table { + err := d.db.Put([]byte(key), value) + if err != nil { + // TODO handle error: either log it, retry, or whatever makes sense for your application + } + } + } +} diff --git a/db/memory/db_test.go b/db/memory/db_test.go new file mode 100644 index 0000000..2105f78 --- /dev/null +++ b/db/memory/db_test.go @@ -0,0 +1,44 @@ +package memory + +import ( + "fmt" + "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/lib/randkv" + "github.com/stretchr/testify/assert" + "os" + "testing" + "time" +) + +func TestPutAndGet(t *testing.T) { + opts := config.DefaultOptions + dir, _ := os.MkdirTemp("", "flydb-benchmark") + opts.DirPath = dir + opts.DataFileSize = 64 * 1024 * 1024 + options := Options{ + Option: opts, + LogNum: 100, + SaveTime: 100 * 1000, + FileSize: 100 * 1024 * 1024, + MemSize: 2 * 1024 * 1024 * 1024, + TotalMemSize: 10 * 1024 * 1024 * 1024, + } + db, err := NewDB(options) + assert.Nil(t, err) + assert.NotNil(t, db) + + start := time.Now() + for n := 0; n < 500000; n++ { + err = db.Put(randkv.GetTestKey(n), randkv.RandomValue(24)) + assert.Nil(t, err) + } + end := time.Now() + fmt.Println("put time: ", end.Sub(start).String()) + start = time.Now() + for n := 0; n < 500000; n++ { + _, err = db.Get(randkv.GetTestKey(n)) + assert.Nil(t, err) + } + end = time.Now() + fmt.Println("get time: ", end.Sub(start).String()) +} diff --git a/db/memory/memory.go b/db/memory/memory.go new file mode 100644 index 0000000..4b55fd1 --- /dev/null +++ b/db/memory/memory.go @@ -0,0 +1,53 @@ +package memory + +import ( + "errors" + "sync" +) + +type MemTable struct { + table map[string][]byte + size int64 + mutex sync.RWMutex + hasFlush map[string]bool +} + +func NewMemTable() *MemTable { + return &MemTable{ + table: make(map[string][]byte), + hasFlush: make(map[string]bool), + } +} + +func (m *MemTable) Put(key string, value []byte) { + m.mutex.Lock() + defer m.mutex.Unlock() + m.table[key] = value + m.size += int64(len(key) + len(value)) +} + +func (m *MemTable) Get(key string) ([]byte, error) { + m.mutex.RLock() + defer m.mutex.RUnlock() + + value, ok := m.table[key] + if !ok { + return nil, errors.New("key not found") + } + + return value, nil +} + +func (m *MemTable) Flush(key string) { + m.mutex.Lock() + defer m.mutex.Unlock() + + m.hasFlush[key] = true +} + +func (m *MemTable) Size() int64 { + m.mutex.RLock() + defer m.mutex.RUnlock() + + return m.size +} diff --git a/db/memory/memory_test.go b/db/memory/memory_test.go new file mode 100644 index 0000000..f6ef5d4 --- /dev/null +++ b/db/memory/memory_test.go @@ -0,0 +1,32 @@ +package memory + +import ( + "github.com/stretchr/testify/assert" + "testing" +) + +func TestNewMemTable(t *testing.T) { + table := NewMemTable() + assert.NotNil(t, table) +} + +func TestMemTable_Get(t *testing.T) { + table := NewMemTable() + assert.NotNil(t, table) + + value, err := table.Get("test") + assert.Nil(t, value) + assert.NotNil(t, err) + assert.Equal(t, "key not found", err.Error()) +} + +func TestMemTable_Put(t *testing.T) { + table := NewMemTable() + assert.NotNil(t, table) + + table.Put("test", []byte("test")) + value, err := table.Get("test") + assert.Nil(t, err) + assert.NotNil(t, value) + assert.Equal(t, "test", string(value)) +} diff --git a/db/memory/wal.go b/db/memory/wal.go new file mode 100644 index 0000000..e7f8bba --- /dev/null +++ b/db/memory/wal.go @@ -0,0 +1,103 @@ +package memory + +import ( + "encoding/binary" + "errors" + "github.com/ByteStorage/FlyDB/db/fileio" + "hash/crc32" + "time" +) + +const ( + // Record types + putType = byte(1) + deleteType = byte(2) +) + +type Wal struct { + m *fileio.MMapIO + logNum uint32 + saveTime int64 +} + +func NewWal(options Options) (*Wal, error) { + mapIO, err := fileio.NewMMapIOManager(options.Option.DirPath+walFileName, options.FileSize) + if err != nil { + return nil, err + } + return &Wal{ + m: mapIO, + logNum: options.LogNum, + saveTime: options.SaveTime, + }, nil +} + +// Put writes a record to the WAL. +//+---------+-----------+-----------+----------------+--- ... ---+ +//|CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | +//+---------+-----------+-----------+----------------+--- ... ---+ +//Same as above, with the addition of +//Log number = 32bit log file number, so that we can distinguish between +//records written by the most recent log writer vs a previous one. +func (w *Wal) writeRecord(recordType byte, key, value []byte) error { + // Prepare the payload based on record type + var payload []byte + switch recordType { + case putType: + payload = append(key, value...) + case deleteType: + payload = key + default: + return errors.New("unknown record type") + } + + size := uint16(4 + len(payload)) // 4 bytes for log number + buffer := make([]byte, 4+2+1+4+len(payload)) + + // Compute CRC + crc := crc32.ChecksumIEEE(buffer[4:]) + binary.LittleEndian.PutUint32(buffer, crc) + + // Write size + binary.LittleEndian.PutUint16(buffer[4:], size) + + // Write type + buffer[4+2] = recordType + + // Write log number + binary.LittleEndian.PutUint32(buffer[4+2+1:], w.logNum) + + // Write payload + copy(buffer[4+2+1+4:], payload) + + _, err := w.m.Write(buffer) + return err +} + +// Put writes a record to the WAL. +func (w *Wal) Put(key []byte, value []byte) error { + return w.writeRecord(putType, key, value) +} + +// Delete writes a delete record to the WAL. +func (w *Wal) Delete(key []byte) error { + return w.writeRecord(deleteType, key, nil) +} + +func (w *Wal) Save() error { + return w.m.Sync() +} + +func (w *Wal) Close() error { + return w.m.Close() +} + +func (w *Wal) AsyncSave() { + for range time.Tick(time.Duration(w.saveTime)) { + err := w.Save() + if err != nil { + // TODO how to fix this error? + continue + } + } +} diff --git a/db/memory/wal_test.go b/db/memory/wal_test.go new file mode 100644 index 0000000..c63740b --- /dev/null +++ b/db/memory/wal_test.go @@ -0,0 +1,29 @@ +package memory + +import ( + "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/lib/randkv" + "github.com/stretchr/testify/assert" + "testing" + "time" +) + +func TestWal_Put(t *testing.T) { + opt := Options{ + Option: config.DefaultOptions, + LogNum: 100, + FileSize: 100 * 1024 * 1024, + SaveTime: 100 * 1000, + } + wal, err := NewWal(opt) + defer wal.Close() + assert.Nil(t, err) + assert.NotNil(t, wal) + start := time.Now() + for n := 0; n < 500000; n++ { + err = wal.Put(randkv.GetTestKey(n), randkv.RandomValue(24)) + assert.Nil(t, err) + } + end := time.Now() + t.Log("put time: ", end.Sub(start).String()) +} From 0d80efb7a4ab8ba03fb976109ff41ad1fd7fd021 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Fri, 18 Aug 2023 23:21:54 +0800 Subject: [PATCH 02/35] wal + memory read/write --- db/memory/db.go | 25 ++++++++++++++++++++----- db/memory/db_test.go | 1 + db/memory/memory.go | 8 -------- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/db/memory/db.go b/db/memory/db.go index 345a78c..9148a97 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -26,7 +26,8 @@ type Db struct { oldList []*MemTable wal *Wal oldListChan chan *MemTable - size int64 + totalSize int64 + activeSize int64 } func NewDB(option Options) (*Db, error) { @@ -49,12 +50,18 @@ func NewDB(option Options) (*Db, error) { option: option, oldList: make([]*MemTable, 0), oldListChan: make(chan *MemTable, 1000000), + activeSize: 0, + totalSize: 0, } go d.async() return d, nil } func (d *Db) Put(key []byte, value []byte) error { + // calculate key and value size + keyLen := int64(len(key)) + valueLen := int64(len(value)) + // Write to WAL err := d.wal.Put(key, value) if err != nil { @@ -70,22 +77,25 @@ func (d *Db) Put(key []byte, value []byte) error { } // if all memTable size > total memTable size, write to db - if d.size > d.option.TotalMemSize { + if d.totalSize > d.option.TotalMemSize { return d.db.Put(key, value) } // if active memTable size > define size, change to immutable memTable - if d.mem.Size()+int64(len(key)+len(value)) > d.option.MemSize { + if d.activeSize+keyLen+valueLen > d.option.MemSize { // add to immutable memTable list d.AddOldMemTable(d.mem) - // add to size - d.size += d.mem.Size() // create new active memTable d.mem = NewMemTable() + d.activeSize = 0 } // write to active memTable d.mem.Put(string(key), value) + + // add size + d.activeSize += keyLen + valueLen + d.totalSize += keyLen + valueLen return nil } @@ -127,6 +137,11 @@ func (d *Db) async() { if err != nil { // TODO handle error: either log it, retry, or whatever makes sense for your application } + d.totalSize -= int64(len(key) + len(value)) } } } + +func (d *Db) Clean() { + d.db.Clean() +} diff --git a/db/memory/db_test.go b/db/memory/db_test.go index 2105f78..f06ee35 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -24,6 +24,7 @@ func TestPutAndGet(t *testing.T) { TotalMemSize: 10 * 1024 * 1024 * 1024, } db, err := NewDB(options) + defer db.Clean() assert.Nil(t, err) assert.NotNil(t, db) diff --git a/db/memory/memory.go b/db/memory/memory.go index 4b55fd1..1c275a2 100644 --- a/db/memory/memory.go +++ b/db/memory/memory.go @@ -23,7 +23,6 @@ func (m *MemTable) Put(key string, value []byte) { m.mutex.Lock() defer m.mutex.Unlock() m.table[key] = value - m.size += int64(len(key) + len(value)) } func (m *MemTable) Get(key string) ([]byte, error) { @@ -44,10 +43,3 @@ func (m *MemTable) Flush(key string) { m.hasFlush[key] = true } - -func (m *MemTable) Size() int64 { - m.mutex.RLock() - defer m.mutex.RUnlock() - - return m.size -} From 97e798b644044da7f919a9b9596647cf516c0549 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 00:30:59 +0800 Subject: [PATCH 03/35] wal + memory read/write --- db/memory/db.go | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/db/memory/db.go b/db/memory/db.go index 9148a97..f532a00 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -4,6 +4,7 @@ import ( "github.com/ByteStorage/FlyDB/config" "github.com/ByteStorage/FlyDB/db/engine" "strings" + "sync" ) const ( @@ -28,6 +29,8 @@ type Db struct { oldListChan chan *MemTable totalSize int64 activeSize int64 + pool *sync.Pool + errMsgCh chan []byte } func NewDB(option Options) (*Db, error) { @@ -52,21 +55,34 @@ func NewDB(option Options) (*Db, error) { oldListChan: make(chan *MemTable, 1000000), activeSize: 0, totalSize: 0, + pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, } go d.async() return d, nil } +func (d *Db) handlerErrMsg() { + for msg := range d.errMsgCh { + // TODO handle error: either log it, retry, or whatever makes sense for your application + _ = msg + } +} + func (d *Db) Put(key []byte, value []byte) error { // calculate key and value size keyLen := int64(len(key)) valueLen := int64(len(value)) - // Write to WAL - err := d.wal.Put(key, value) - if err != nil { - return err - } + d.pool.Put(func() { + // Write to WAL + err := d.wal.Put(key, value) + if err != nil { + err := d.wal.Delete(key) + if err != nil { + d.errMsgCh <- []byte(err.Error()) + } + } + }) // if sync write, save wal if d.option.Option.SyncWrite { From 914668dcf44f7ead0fc98dc8d87062063077e78e Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 01:29:53 +0800 Subject: [PATCH 04/35] add bloom --- db/memory/bloom.go | 138 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 db/memory/bloom.go diff --git a/db/memory/bloom.go b/db/memory/bloom.go new file mode 100644 index 0000000..3902f25 --- /dev/null +++ b/db/memory/bloom.go @@ -0,0 +1,138 @@ +package memory + +import "math" + +// Filter is an encoded set of []byte keys. +type Filter []byte + +// MayContainKey returns whether the filter may contain given key. False positives +func (f Filter) MayContainKey(k []byte) bool { + return f.MayContain(Hash(k)) +} + +// MayContain returns whether the filter may contain given key. False positives +// are possible, where it returns true for keys not in the original set. +func (f Filter) MayContain(h uint32) bool { + if len(f) < 2 { + return false + } + k := f[len(f)-1] + if k > 30 { + // This is reserved for potentially new encodings for short Bloom filters. + // Consider it a match. + return true + } + nBits := uint32(8 * (len(f) - 1)) + delta := h>>17 | h<<15 + for j := uint8(0); j < k; j++ { + bitPos := h % nBits + if f[bitPos/8]&(1<<(bitPos%8)) == 0 { + return false + } + h += delta + } + return true +} + +// NewFilter returns a new Bloom filter that encodes a set of []byte keys with +// the given number of bits per key, approximately. +// +// A good bitsPerKey value is 10, which yields a filter with ~ 1% false +// positive rate. +func NewFilter(keys []uint32, bitsPerKey int) Filter { + return Filter(appendFilter(nil, keys, bitsPerKey)) +} + +// BloomBitsPerKey returns the bits per key required by bloomfilter based on +// the false positive rate. +func BloomBitsPerKey(numEntries int, fp float64) int { + size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2) + locs := math.Ceil(float64(0.69314718056) * size / float64(numEntries)) + return int(locs) +} + +func appendFilter(buf []byte, keys []uint32, bitsPerKey int) []byte { + if bitsPerKey < 0 { + bitsPerKey = 0 + } + // 0.69 is approximately ln(2). + k := uint32(float64(bitsPerKey) * 0.69) + if k < 1 { + k = 1 + } + if k > 30 { + k = 30 + } + + nBits := len(keys) * bitsPerKey + // For small len(keys), we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if nBits < 64 { + nBits = 64 + } + nBytes := (nBits + 7) / 8 + nBits = nBytes * 8 + buf, filter := extend(buf, nBytes+1) + + for _, h := range keys { + delta := h>>17 | h<<15 + for j := uint32(0); j < k; j++ { + bitPos := h % uint32(nBits) + filter[bitPos/8] |= 1 << (bitPos % 8) + h += delta + } + } + filter[nBytes] = uint8(k) + + return buf +} + +// extend appends n zero bytes to b. It returns the overall slice (of length +// n+len(originalB)) and the slice of n trailing zeroes. +func extend(b []byte, n int) (overall, trailer []byte) { + want := n + len(b) + if want <= cap(b) { + overall = b[:want] + trailer = overall[len(b):] + for i := range trailer { + trailer[i] = 0 + } + } else { + // Grow the capacity exponentially, with a 1KiB minimum. + c := 1024 + for c < want { + c += c / 4 + } + overall = make([]byte, want, c) + trailer = overall[len(b):] + copy(overall, b) + } + return overall, trailer +} + +// Hash implements a hashing algorithm similar to the Murmur hash. +func Hash(b []byte) uint32 { + const ( + seed = 0xbc9f1d34 + m = 0xc6a4a793 + ) + h := uint32(seed) ^ uint32(len(b))*m + for ; len(b) >= 4; b = b[4:] { + h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 + h *= m + h ^= h >> 16 + } + switch len(b) { + case 3: + h += uint32(b[2]) << 16 + fallthrough + case 2: + h += uint32(b[1]) << 8 + fallthrough + case 1: + h += uint32(b[0]) + h *= m + h ^= h >> 24 + } + return h +} From 3b4a5b56a5197cba3dd01335cd4b450eef36680d Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 01:30:30 +0800 Subject: [PATCH 05/35] add bloom --- db/memory/bloom_test.go | 143 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 db/memory/bloom_test.go diff --git a/db/memory/bloom_test.go b/db/memory/bloom_test.go new file mode 100644 index 0000000..52da531 --- /dev/null +++ b/db/memory/bloom_test.go @@ -0,0 +1,143 @@ +package memory + +import ( + "testing" +) + +func (f Filter) String() string { + s := make([]byte, 8*len(f)) + for i, x := range f { + for j := 0; j < 8; j++ { + if x&(1<> 0) + b[1] = uint8(uint32(i) >> 8) + b[2] = uint8(uint32(i) >> 16) + b[3] = uint8(uint32(i) >> 24) + return b + } + + nMediocreFilters, nGoodFilters := 0, 0 +loop: + for length := 1; length <= 10000; length = nextLength(length) { + keys := make([][]byte, 0, length) + for i := 0; i < length; i++ { + keys = append(keys, le32(i)) + } + var hashes []uint32 + for _, key := range keys { + hashes = append(hashes, Hash(key)) + } + f := NewFilter(hashes, 10) + + if len(f) > (length*10/8)+40 { + t.Errorf("length=%d: len(f)=%d is too large", length, len(f)) + continue + } + + // All added keys must match. + for _, key := range keys { + if !f.MayContainKey(key) { + t.Errorf("length=%d: did not contain key %q", length, key) + continue loop + } + } + + // Check false positive rate. + nFalsePositive := 0 + for i := 0; i < 10000; i++ { + if f.MayContainKey(le32(1e9 + i)) { + nFalsePositive++ + } + } + if nFalsePositive > 0.02*10000 { + t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive) + continue + } + if nFalsePositive > 0.0125*10000 { + nMediocreFilters++ + } else { + nGoodFilters++ + } + } + + if nMediocreFilters > nGoodFilters/5 { + t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters) + } +} + +func TestHash(t *testing.T) { + // The magic want numbers come from running the C++ leveldb code in hash.cc. + testCases := []struct { + s string + want uint32 + }{ + {"", 0xbc9f1d34}, + {"g", 0xd04a8bda}, + {"go", 0x3e0b0745}, + {"gop", 0x0c326610}, + {"goph", 0x8c9d6390}, + {"gophe", 0x9bfd4b0a}, + {"gopher", 0xa78edc7c}, + {"I had a dream it would end this way.", 0xe14a9db9}, + } + for _, tc := range testCases { + if got := Hash([]byte(tc.s)); got != tc.want { + t.Errorf("s=%q: got 0x%08x, want 0x%08x", tc.s, got, tc.want) + } + } +} From cd0b12a2a8e8e24f6182cd739741809eb3deba2e Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 01:33:07 +0800 Subject: [PATCH 06/35] fix ci bug --- .github/workflows/go.yml | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index fd3b818..fdca9b5 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -27,17 +27,6 @@ jobs: - name: Install Dependency run: sudo apt-get update && sudo apt-get -y install librocksdb-dev - - name: Clear cache - run: go clean -modcache - - - name: Cache Go modules - uses: actions/cache@v2 - with: - path: ~/go/pkg/mod - key: ${{ runner.os }}-go-${{ hashFiles('**/go.sum') }} - restore-keys: | - ${{ runner.os }}-go- - - name: Check Format run: if [ "$(gofmt -s -l . | wc -l)" -gt 0 ]; then exit 1; fi From a6e62d62e93e5d2d04a6a08033a3f4559c72f3f8 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 01:55:31 +0800 Subject: [PATCH 07/35] fix ci bug --- .github/workflows/go.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index fdca9b5..4614f7f 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -28,7 +28,14 @@ jobs: run: sudo apt-get update && sudo apt-get -y install librocksdb-dev - name: Check Format - run: if [ "$(gofmt -s -l . | wc -l)" -gt 0 ]; then exit 1; fi + run: | + unformatted=$(gofmt -s -l .) + if [ "$unformatted" != "" ]; then + echo "the following files are not formatted:" + echo "$unformatted" + exit 1 + fi + - name: Lint uses: golangci/golangci-lint-action@v3.6.0 From 154cfd2bf62e1af3677962f52d20720b1dd8f72c Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 01:59:45 +0800 Subject: [PATCH 08/35] fix ci bug --- db/memory/wal.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/db/memory/wal.go b/db/memory/wal.go index e7f8bba..0d51f18 100644 --- a/db/memory/wal.go +++ b/db/memory/wal.go @@ -3,9 +3,10 @@ package memory import ( "encoding/binary" "errors" - "github.com/ByteStorage/FlyDB/db/fileio" "hash/crc32" "time" + + "github.com/ByteStorage/FlyDB/db/fileio" ) const ( From dbf55bcad99b7aa51588baba7a02183dd0176887 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 02:05:15 +0800 Subject: [PATCH 09/35] fix ci bug --- .github/workflows/go.yml | 7 ++++++- db/memory/db.go | 4 ---- db/memory/wal.go | 3 +++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 4614f7f..85e80b3 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -32,9 +32,14 @@ jobs: unformatted=$(gofmt -s -l .) if [ "$unformatted" != "" ]; then echo "the following files are not formatted:" - echo "$unformatted" + for file in $unformatted; do + echo "$file:" + gofmt -s -d "$file" + echo "------" + done exit 1 fi + - name: Lint diff --git a/db/memory/db.go b/db/memory/db.go index f532a00..4aab0e5 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -7,10 +7,6 @@ import ( "sync" ) -const ( - walFileName = "/db.wal" -) - type Options struct { Option config.Options LogNum uint32 diff --git a/db/memory/wal.go b/db/memory/wal.go index 0d51f18..33143dd 100644 --- a/db/memory/wal.go +++ b/db/memory/wal.go @@ -13,6 +13,9 @@ const ( // Record types putType = byte(1) deleteType = byte(2) + + // File names for WAL + walFileName = "/db.wal" ) type Wal struct { From 392174a72fb86d5030a7e212a1a731ba611fef0a Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 02:07:56 +0800 Subject: [PATCH 10/35] fix ci bug --- db/memory/wal.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/db/memory/wal.go b/db/memory/wal.go index 33143dd..f41f9f3 100644 --- a/db/memory/wal.go +++ b/db/memory/wal.go @@ -37,12 +37,12 @@ func NewWal(options Options) (*Wal, error) { } // Put writes a record to the WAL. -//+---------+-----------+-----------+----------------+--- ... ---+ -//|CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | -//+---------+-----------+-----------+----------------+--- ... ---+ -//Same as above, with the addition of -//Log number = 32bit log file number, so that we can distinguish between -//records written by the most recent log writer vs a previous one. +// +---------+-----------+-----------+----------------+--- ... ---+ +// |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | +// +---------+-----------+-----------+----------------+--- ... ---+ +// Same as above, with the addition of +// Log number = 32bit log file number, so that we can distinguish between +// records written by the most recent log writer vs a previous one. func (w *Wal) writeRecord(recordType byte, key, value []byte) error { // Prepare the payload based on record type var payload []byte From ee04ef8ec8b9ab5d0c21f5940f5cda922bf67a82 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 02:11:08 +0800 Subject: [PATCH 11/35] fix ci bug --- db/memory/wal.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/db/memory/wal.go b/db/memory/wal.go index f41f9f3..c8013ed 100644 --- a/db/memory/wal.go +++ b/db/memory/wal.go @@ -37,12 +37,12 @@ func NewWal(options Options) (*Wal, error) { } // Put writes a record to the WAL. -// +---------+-----------+-----------+----------------+--- ... ---+ -// |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | -// +---------+-----------+-----------+----------------+--- ... ---+ -// Same as above, with the addition of -// Log number = 32bit log file number, so that we can distinguish between -// records written by the most recent log writer vs a previous one. +// +---------+-----------+-----------+----------------+--- ... ---+ +// |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | +// +---------+-----------+-----------+----------------+--- ... ---+ +// Same as above, with the addition of +// Log number = 32bit log file number, so that we can distinguish between +// records written by the most recent log writer vs a previous one. func (w *Wal) writeRecord(recordType byte, key, value []byte) error { // Prepare the payload based on record type var payload []byte From 6bc58369fac1714d058e0724bcec74825bc6d24f Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 02:52:46 +0800 Subject: [PATCH 12/35] add column feat --- db/memory/api.go | 20 +++++ db/memory/column.go | 173 +++++++++++++++++++++++++++++++++++++++++ db/memory/db.go | 30 +++---- db/memory/error_log.go | 5 ++ 4 files changed, 209 insertions(+), 19 deletions(-) create mode 100644 db/memory/api.go create mode 100644 db/memory/column.go create mode 100644 db/memory/error_log.go diff --git a/db/memory/api.go b/db/memory/api.go new file mode 100644 index 0000000..6165865 --- /dev/null +++ b/db/memory/api.go @@ -0,0 +1,20 @@ +package memory + +type Api interface { + CreateColumnFamily(name string) error + DropColumnFamily(name string) error + Put(key []byte, value []byte) error + Get(key []byte) ([]byte, error) + Delete(key []byte) error + Keys() ([][]byte, error) + Close() error + ListColumnFamilies() ([]string, error) + PutCF(cf string, key []byte, value []byte) error + GetCF(cf string, key []byte) ([]byte, error) + DeleteCF(cf string, key []byte) error + KeysCF(cf string) ([][]byte, error) +} + +// Put: We can divide one column to many column families. Each column family has its own memTable and SSTable. +// Like MySQL divide one table to many partitions. Each partition has its own value and index. +// 简单来说就是通过列族来模拟的MySQL的分库分表的功能,可以实现并发写入,提高写入性能,例如 diff --git a/db/memory/column.go b/db/memory/column.go new file mode 100644 index 0000000..67e799e --- /dev/null +++ b/db/memory/column.go @@ -0,0 +1,173 @@ +package memory + +import ( + "errors" + "fmt" + "github.com/ByteStorage/FlyDB/config" + "io/ioutil" + "os" + "strings" + "sync" +) + +type Options struct { + Option config.Options + LogNum uint32 + FileSize int64 + SaveTime int64 + MemSize int64 + TotalMemSize int64 + ColumnName string + wal *Wal +} + +// Column is a column family +type Column interface { + // CreateColumnFamily create column family + CreateColumnFamily(name string) error + // DropColumnFamily drop column family + DropColumnFamily(name string) error + // ListColumnFamilies list column families + ListColumnFamilies() ([]string, error) + // Put a key/value pair into the column family + Put(cf string, key []byte, value []byte) error + // Get a value from the column family + Get(cf string, key []byte) ([]byte, error) + // Delete a key from the column family + Delete(cf string, key []byte) error + // Keys returns all keys in the column family + Keys(cf string) ([][]byte, error) +} + +// NewColumn create a column family +func NewColumn(option Options) (Column, error) { + // create wal, all column family share a wal + wal, err := NewWal(option) + if err != nil { + return nil, err + } + + // load column family + col, err := loadColumn(option) + if err != nil { + return nil, err + } + + // if column family exists, return it + if len(col) > 0 { + columnFamily := make(map[string]*Db) + for k, v := range col { + columnFamily[k] = v + } + return &column{ + option: option, + mux: sync.RWMutex{}, + columnFamily: columnFamily, + wal: wal, + }, nil + } + + // if column family not exists, create a new column family + if option.ColumnName == "" { + option.ColumnName = "default" + } + + // create a new db + db, err := NewDB(option) + if err != nil { + return nil, err + } + return &column{ + option: option, + mux: sync.RWMutex{}, + columnFamily: map[string]*Db{ + option.ColumnName: db, + }, + wal: wal, + }, nil +} + +type column struct { + mux sync.RWMutex + wal *Wal + columnFamily map[string]*Db + option Options +} + +func (c *column) CreateColumnFamily(name string) error { + c.mux.Lock() + defer c.mux.Unlock() + if _, ok := c.columnFamily[name]; ok { + return errors.New("column family already exists") + } + db, err := NewDB(c.option) + if err != nil { + return err + } + c.columnFamily[name] = db + return nil +} + +func (c *column) DropColumnFamily(name string) error { + c.mux.Lock() + defer c.mux.Unlock() + if _, ok := c.columnFamily[name]; !ok { + return errors.New("column family not exists") + } + delete(c.columnFamily, name) + return nil +} + +func (c *column) ListColumnFamilies() ([]string, error) { + c.mux.RLock() + defer c.mux.RUnlock() + var list []string + for k := range c.columnFamily { + list = append(list, k) + } + return list, nil +} + +func (c *column) Put(cf string, key []byte, value []byte) error { + return c.columnFamily[cf].Put(key, value) +} + +func (c *column) Get(cf string, key []byte) ([]byte, error) { + return c.columnFamily[cf].Get(key) +} + +func (c *column) Delete(cf string, key []byte) error { + return c.columnFamily[cf].Delete(key) +} + +func (c *column) Keys(cf string) ([][]byte, error) { + return c.columnFamily[cf].Keys() +} + +func loadColumn(option Options) (map[string]*Db, error) { + base := option.Option.DirPath + base = strings.Trim(base, "/") + // Check if the base path exists + if _, err := os.Stat(base); os.IsNotExist(err) { + return nil, fmt.Errorf("directory does not exist: %s", base) + } + // List all directories under the base path + dirs, err := ioutil.ReadDir(base) + if err != nil { + return nil, err + } + columns := make(map[string]*Db) + for _, dir := range dirs { + if dir.IsDir() { + colName := dir.Name() + dirPath := base + "/" + colName + option.Option.DirPath = dirPath + db, err := NewDB(option) + if err != nil { + return nil, err + } + columns[colName] = db + } + } + return columns, nil +} diff --git a/db/memory/db.go b/db/memory/db.go index 4aab0e5..e6d26fb 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -1,21 +1,10 @@ package memory import ( - "github.com/ByteStorage/FlyDB/config" "github.com/ByteStorage/FlyDB/db/engine" - "strings" "sync" ) -type Options struct { - Option config.Options - LogNum uint32 - FileSize int64 - SaveTime int64 - MemSize int64 - TotalMemSize int64 -} - type Db struct { option Options db *engine.DB @@ -31,21 +20,15 @@ type Db struct { func NewDB(option Options) (*Db, error) { mem := NewMemTable() + option.Option.DirPath = option.Option.DirPath + "/" + option.ColumnName db, err := engine.NewDB(option.Option) if err != nil { return nil, err } - // Create or open the WAL file. - option.Option.DirPath = strings.TrimSuffix(option.Option.DirPath, "/") - wal, err := NewWal(option) - if err != nil { - return nil, err - } - go wal.AsyncSave() d := &Db{ mem: mem, db: db, - wal: wal, + wal: option.wal, option: option, oldList: make([]*MemTable, 0), oldListChan: make(chan *MemTable, 1000000), @@ -54,6 +37,7 @@ func NewDB(option Options) (*Db, error) { pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, } go d.async() + go d.wal.AsyncSave() return d, nil } @@ -130,6 +114,14 @@ func (d *Db) Get(key []byte) ([]byte, error) { return d.db.Get(key) } +func (d *Db) Delete(key []byte) error { + panic("implement me") +} + +func (d *Db) Keys() ([][]byte, error) { + panic("implement me") +} + func (d *Db) Close() error { err := d.wal.Save() if err != nil { diff --git a/db/memory/error_log.go b/db/memory/error_log.go new file mode 100644 index 0000000..e839343 --- /dev/null +++ b/db/memory/error_log.go @@ -0,0 +1,5 @@ +package memory + +func Errorf(format string, args ...interface{}) { + +} From 1a98586b65720d2ad7618874f25ccdb9d804d3de Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 02:57:57 +0800 Subject: [PATCH 13/35] fix bug with test --- db/memory/db_test.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/db/memory/db_test.go b/db/memory/db_test.go index f06ee35..fda1eab 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -15,6 +15,7 @@ func TestPutAndGet(t *testing.T) { dir, _ := os.MkdirTemp("", "flydb-benchmark") opts.DirPath = dir opts.DataFileSize = 64 * 1024 * 1024 + options := Options{ Option: opts, LogNum: 100, @@ -23,6 +24,9 @@ func TestPutAndGet(t *testing.T) { MemSize: 2 * 1024 * 1024 * 1024, TotalMemSize: 10 * 1024 * 1024 * 1024, } + wal, err := NewWal(options) + assert.Nil(t, err) + options.wal = wal db, err := NewDB(options) defer db.Clean() assert.Nil(t, err) From 4ee195ab09e6f94eeda3f0aebb328b643cee9ce5 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 18:36:33 +0800 Subject: [PATCH 14/35] add comment --- db/memory/bloom.go | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/db/memory/bloom.go b/db/memory/bloom.go index 3902f25..a4f6acb 100644 --- a/db/memory/bloom.go +++ b/db/memory/bloom.go @@ -2,30 +2,43 @@ package memory import "math" +const ( + seed = 0xbc9f1d34 + m = 0xc6a4a793 +) + // Filter is an encoded set of []byte keys. type Filter []byte // MayContainKey returns whether the filter may contain given key. False positives func (f Filter) MayContainKey(k []byte) bool { - return f.MayContain(Hash(k)) + return f.mayContain(Hash(k)) } // MayContain returns whether the filter may contain given key. False positives // are possible, where it returns true for keys not in the original set. -func (f Filter) MayContain(h uint32) bool { +func (f Filter) mayContain(h uint32) bool { + // check if the filter is empty if len(f) < 2 { return false } + // obtain the number of hash functions k := f[len(f)-1] + // if k > 30, this is reserved for potentially new encodings for short Bloom filters. if k > 30 { // This is reserved for potentially new encodings for short Bloom filters. // Consider it a match. return true } + // calculate the total number of bits in the filter. nBits := uint32(8 * (len(f) - 1)) + // change the hash value by right shift and left shift to generate different bit positions for subsequent iterations. delta := h>>17 | h<<15 for j := uint8(0); j < k; j++ { + // For each hash function, calculate the bit position bitPos bitPos := h % nBits + // Check if the corresponding bit has been set. + // If the bit has not been set, the key is definitely not in the set, and false is returned. if f[bitPos/8]&(1<<(bitPos%8)) == 0 { return false } @@ -112,16 +125,16 @@ func extend(b []byte, n int) (overall, trailer []byte) { // Hash implements a hashing algorithm similar to the Murmur hash. func Hash(b []byte) uint32 { - const ( - seed = 0xbc9f1d34 - m = 0xc6a4a793 - ) + // The original algorithm uses a seed of 0x9747b28c. h := uint32(seed) ^ uint32(len(b))*m + // Pick up four bytes at a time. for ; len(b) >= 4; b = b[4:] { + // The original algorithm uses the following commented out code to load h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 h *= m h ^= h >> 16 } + // Pick up remaining bytes. switch len(b) { case 3: h += uint32(b[2]) << 16 From d825835ac08aec932ac0c47919209839606411a9 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 21:39:29 +0800 Subject: [PATCH 15/35] add comment --- db/{memory => column}/column.go | 25 +++++++++++++------------ db/memory/api.go | 1 - db/memory/db.go | 7 ++++--- db/memory/db_test.go | 5 +++-- db/memory/memory.go | 19 ++++++++++--------- db/memory/wal.go | 14 ++++++++++---- db/memory/wal_test.go | 3 ++- 7 files changed, 42 insertions(+), 32 deletions(-) rename db/{memory => column}/column.go (88%) diff --git a/db/memory/column.go b/db/column/column.go similarity index 88% rename from db/memory/column.go rename to db/column/column.go index 67e799e..f909f76 100644 --- a/db/memory/column.go +++ b/db/column/column.go @@ -1,9 +1,10 @@ -package memory +package column import ( "errors" "fmt" "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/db/memory" "io/ioutil" "os" "strings" @@ -18,7 +19,7 @@ type Options struct { MemSize int64 TotalMemSize int64 ColumnName string - wal *Wal + Wal *memory.Wal } // Column is a column family @@ -42,7 +43,7 @@ type Column interface { // NewColumn create a column family func NewColumn(option Options) (Column, error) { // create wal, all column family share a wal - wal, err := NewWal(option) + wal, err := memory.NewWal(option) if err != nil { return nil, err } @@ -55,7 +56,7 @@ func NewColumn(option Options) (Column, error) { // if column family exists, return it if len(col) > 0 { - columnFamily := make(map[string]*Db) + columnFamily := make(map[string]*memory.Db) for k, v := range col { columnFamily[k] = v } @@ -73,14 +74,14 @@ func NewColumn(option Options) (Column, error) { } // create a new db - db, err := NewDB(option) + db, err := memory.NewDB(option) if err != nil { return nil, err } return &column{ option: option, mux: sync.RWMutex{}, - columnFamily: map[string]*Db{ + columnFamily: map[string]*memory.Db{ option.ColumnName: db, }, wal: wal, @@ -89,8 +90,8 @@ func NewColumn(option Options) (Column, error) { type column struct { mux sync.RWMutex - wal *Wal - columnFamily map[string]*Db + wal *memory.Wal + columnFamily map[string]*memory.Db option Options } @@ -100,7 +101,7 @@ func (c *column) CreateColumnFamily(name string) error { if _, ok := c.columnFamily[name]; ok { return errors.New("column family already exists") } - db, err := NewDB(c.option) + db, err := memory.NewDB(c.option) if err != nil { return err } @@ -144,7 +145,7 @@ func (c *column) Keys(cf string) ([][]byte, error) { return c.columnFamily[cf].Keys() } -func loadColumn(option Options) (map[string]*Db, error) { +func loadColumn(option Options) (map[string]*memory.Db, error) { base := option.Option.DirPath base = strings.Trim(base, "/") // Check if the base path exists @@ -156,13 +157,13 @@ func loadColumn(option Options) (map[string]*Db, error) { if err != nil { return nil, err } - columns := make(map[string]*Db) + columns := make(map[string]*memory.Db) for _, dir := range dirs { if dir.IsDir() { colName := dir.Name() dirPath := base + "/" + colName option.Option.DirPath = dirPath - db, err := NewDB(option) + db, err := memory.NewDB(option) if err != nil { return nil, err } diff --git a/db/memory/api.go b/db/memory/api.go index 6165865..fecb673 100644 --- a/db/memory/api.go +++ b/db/memory/api.go @@ -17,4 +17,3 @@ type Api interface { // Put: We can divide one column to many column families. Each column family has its own memTable and SSTable. // Like MySQL divide one table to many partitions. Each partition has its own value and index. -// 简单来说就是通过列族来模拟的MySQL的分库分表的功能,可以实现并发写入,提高写入性能,例如 diff --git a/db/memory/db.go b/db/memory/db.go index e6d26fb..7737e25 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -1,12 +1,13 @@ package memory import ( + "github.com/ByteStorage/FlyDB/db/column" "github.com/ByteStorage/FlyDB/db/engine" "sync" ) type Db struct { - option Options + option column.Options db *engine.DB mem *MemTable oldList []*MemTable @@ -18,7 +19,7 @@ type Db struct { errMsgCh chan []byte } -func NewDB(option Options) (*Db, error) { +func NewDB(option column.Options) (*Db, error) { mem := NewMemTable() option.Option.DirPath = option.Option.DirPath + "/" + option.ColumnName db, err := engine.NewDB(option.Option) @@ -28,7 +29,7 @@ func NewDB(option Options) (*Db, error) { d := &Db{ mem: mem, db: db, - wal: option.wal, + wal: option.Wal, option: option, oldList: make([]*MemTable, 0), oldListChan: make(chan *MemTable, 1000000), diff --git a/db/memory/db_test.go b/db/memory/db_test.go index fda1eab..bdcc368 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -3,6 +3,7 @@ package memory import ( "fmt" "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/db/column" "github.com/ByteStorage/FlyDB/lib/randkv" "github.com/stretchr/testify/assert" "os" @@ -16,7 +17,7 @@ func TestPutAndGet(t *testing.T) { opts.DirPath = dir opts.DataFileSize = 64 * 1024 * 1024 - options := Options{ + options := column.Options{ Option: opts, LogNum: 100, SaveTime: 100 * 1000, @@ -26,7 +27,7 @@ func TestPutAndGet(t *testing.T) { } wal, err := NewWal(options) assert.Nil(t, err) - options.wal = wal + options.Wal = wal db, err := NewDB(options) defer db.Clean() assert.Nil(t, err) diff --git a/db/memory/memory.go b/db/memory/memory.go index 1c275a2..990b0d0 100644 --- a/db/memory/memory.go +++ b/db/memory/memory.go @@ -5,26 +5,27 @@ import ( "sync" ) +// MemTable is a in-memory table type MemTable struct { - table map[string][]byte - size int64 - mutex sync.RWMutex - hasFlush map[string]bool + table map[string][]byte // key -> value + mutex sync.RWMutex // protect table } +// NewMemTable create a new MemTable func NewMemTable() *MemTable { return &MemTable{ - table: make(map[string][]byte), - hasFlush: make(map[string]bool), + table: make(map[string][]byte), } } +// Put a key-value pair into the table func (m *MemTable) Put(key string, value []byte) { m.mutex.Lock() defer m.mutex.Unlock() m.table[key] = value } +// Get a value from the table func (m *MemTable) Get(key string) ([]byte, error) { m.mutex.RLock() defer m.mutex.RUnlock() @@ -37,9 +38,9 @@ func (m *MemTable) Get(key string) ([]byte, error) { return value, nil } -func (m *MemTable) Flush(key string) { +// Delete a key from the table +func (m *MemTable) Delete(key string) { m.mutex.Lock() defer m.mutex.Unlock() - - m.hasFlush[key] = true + delete(m.table, key) } diff --git a/db/memory/wal.go b/db/memory/wal.go index c8013ed..7e3c9d7 100644 --- a/db/memory/wal.go +++ b/db/memory/wal.go @@ -3,6 +3,7 @@ package memory import ( "encoding/binary" "errors" + "github.com/ByteStorage/FlyDB/db/column" "hash/crc32" "time" @@ -18,13 +19,15 @@ const ( walFileName = "/db.wal" ) +// Wal is a write-ahead log. type Wal struct { - m *fileio.MMapIO - logNum uint32 - saveTime int64 + m *fileio.MMapIO // MMapIOManager + logNum uint32 // Log number + saveTime int64 // Save time } -func NewWal(options Options) (*Wal, error) { +// NewWal creates a new WAL. +func NewWal(options column.Options) (*Wal, error) { mapIO, err := fileio.NewMMapIOManager(options.Option.DirPath+walFileName, options.FileSize) if err != nil { return nil, err @@ -88,14 +91,17 @@ func (w *Wal) Delete(key []byte) error { return w.writeRecord(deleteType, key, nil) } +// Save flushes the WAL to disk. func (w *Wal) Save() error { return w.m.Sync() } +// Close closes the WAL. func (w *Wal) Close() error { return w.m.Close() } +// AsyncSave periodically flushes the WAL to disk. func (w *Wal) AsyncSave() { for range time.Tick(time.Duration(w.saveTime)) { err := w.Save() diff --git a/db/memory/wal_test.go b/db/memory/wal_test.go index c63740b..9f551ad 100644 --- a/db/memory/wal_test.go +++ b/db/memory/wal_test.go @@ -2,6 +2,7 @@ package memory import ( "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/db/column" "github.com/ByteStorage/FlyDB/lib/randkv" "github.com/stretchr/testify/assert" "testing" @@ -9,7 +10,7 @@ import ( ) func TestWal_Put(t *testing.T) { - opt := Options{ + opt := column.Options{ Option: config.DefaultOptions, LogNum: 100, FileSize: 100 * 1024 * 1024, From 04a53ffb726a7fa95341e922317b2634740afc9d Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sat, 19 Aug 2023 23:28:17 +0800 Subject: [PATCH 16/35] refactor dir --- config/column_options.go | 11 +++ config/db_memory_options.go | 11 +++ config/wal_config.go | 8 ++ db/column/column.go | 66 +++++++++----- db/memory/db.go | 21 +++-- db/memory/db_test.go | 19 ++-- db/memory/wal.go | 113 ----------------------- db/memory/wal_test.go | 30 ------- {db/memory => lib/bloom}/bloom.go | 2 +- {db/memory => lib/bloom}/bloom_test.go | 2 +- lib/wal/wal.go | 120 +++++++++++++++++++------ lib/wal/wal_db.go | 57 ------------ lib/wal/wal_test.go | 70 +++++---------- 13 files changed, 211 insertions(+), 319 deletions(-) create mode 100644 config/column_options.go create mode 100644 config/db_memory_options.go create mode 100644 config/wal_config.go delete mode 100644 db/memory/wal.go delete mode 100644 db/memory/wal_test.go rename {db/memory => lib/bloom}/bloom.go (99%) rename {db/memory => lib/bloom}/bloom_test.go (99%) delete mode 100644 lib/wal/wal_db.go diff --git a/config/column_options.go b/config/column_options.go new file mode 100644 index 0000000..058d449 --- /dev/null +++ b/config/column_options.go @@ -0,0 +1,11 @@ +package config + +type ColumnOptions struct { + Option Options + LogNum uint32 + FileSize int64 + SaveTime int64 + MemSize int64 + TotalMemSize int64 + ColumnName string +} diff --git a/config/db_memory_options.go b/config/db_memory_options.go new file mode 100644 index 0000000..ec40bba --- /dev/null +++ b/config/db_memory_options.go @@ -0,0 +1,11 @@ +package config + +type DbMemoryOptions struct { + Option Options + LogNum uint32 + FileSize int64 + SaveTime int64 + MemSize int64 + TotalMemSize int64 + ColumnName string +} diff --git a/config/wal_config.go b/config/wal_config.go new file mode 100644 index 0000000..0be7e92 --- /dev/null +++ b/config/wal_config.go @@ -0,0 +1,8 @@ +package config + +type WalConfig struct { + DirPath string + FileSize int64 + SaveTime int64 + LogNum uint32 +} diff --git a/db/column/column.go b/db/column/column.go index f909f76..d712907 100644 --- a/db/column/column.go +++ b/db/column/column.go @@ -5,23 +5,13 @@ import ( "fmt" "github.com/ByteStorage/FlyDB/config" "github.com/ByteStorage/FlyDB/db/memory" + "github.com/ByteStorage/FlyDB/lib/wal" "io/ioutil" "os" "strings" "sync" ) -type Options struct { - Option config.Options - LogNum uint32 - FileSize int64 - SaveTime int64 - MemSize int64 - TotalMemSize int64 - ColumnName string - Wal *memory.Wal -} - // Column is a column family type Column interface { // CreateColumnFamily create column family @@ -41,9 +31,15 @@ type Column interface { } // NewColumn create a column family -func NewColumn(option Options) (Column, error) { +func NewColumn(option config.ColumnOptions) (Column, error) { // create wal, all column family share a wal - wal, err := memory.NewWal(option) + walConf := config.WalConfig{ + DirPath: option.Option.DirPath, + LogNum: option.LogNum, + SaveTime: option.SaveTime, + FileSize: option.FileSize, + } + w, err := wal.NewWal(walConf) if err != nil { return nil, err } @@ -64,7 +60,7 @@ func NewColumn(option Options) (Column, error) { option: option, mux: sync.RWMutex{}, columnFamily: columnFamily, - wal: wal, + wal: w, }, nil } @@ -73,8 +69,18 @@ func NewColumn(option Options) (Column, error) { option.ColumnName = "default" } + options := config.DbMemoryOptions{ + Option: option.Option, + LogNum: option.LogNum, + FileSize: option.FileSize, + SaveTime: option.SaveTime, + MemSize: option.MemSize, + TotalMemSize: option.TotalMemSize, + ColumnName: option.ColumnName, + } + // create a new db - db, err := memory.NewDB(option) + db, err := memory.NewDB(options) if err != nil { return nil, err } @@ -84,15 +90,15 @@ func NewColumn(option Options) (Column, error) { columnFamily: map[string]*memory.Db{ option.ColumnName: db, }, - wal: wal, + wal: w, }, nil } type column struct { mux sync.RWMutex - wal *memory.Wal + wal *wal.Wal columnFamily map[string]*memory.Db - option Options + option config.ColumnOptions } func (c *column) CreateColumnFamily(name string) error { @@ -101,7 +107,16 @@ func (c *column) CreateColumnFamily(name string) error { if _, ok := c.columnFamily[name]; ok { return errors.New("column family already exists") } - db, err := memory.NewDB(c.option) + options := config.DbMemoryOptions{ + Option: c.option.Option, + LogNum: c.option.LogNum, + FileSize: c.option.FileSize, + SaveTime: c.option.SaveTime, + MemSize: c.option.MemSize, + TotalMemSize: c.option.TotalMemSize, + ColumnName: c.option.ColumnName, + } + db, err := memory.NewDB(options) if err != nil { return err } @@ -145,7 +160,7 @@ func (c *column) Keys(cf string) ([][]byte, error) { return c.columnFamily[cf].Keys() } -func loadColumn(option Options) (map[string]*memory.Db, error) { +func loadColumn(option config.ColumnOptions) (map[string]*memory.Db, error) { base := option.Option.DirPath base = strings.Trim(base, "/") // Check if the base path exists @@ -163,7 +178,16 @@ func loadColumn(option Options) (map[string]*memory.Db, error) { colName := dir.Name() dirPath := base + "/" + colName option.Option.DirPath = dirPath - db, err := memory.NewDB(option) + options := config.DbMemoryOptions{ + Option: option.Option, + LogNum: option.LogNum, + FileSize: option.FileSize, + SaveTime: option.SaveTime, + MemSize: option.MemSize, + TotalMemSize: option.TotalMemSize, + ColumnName: option.ColumnName, + } + db, err := memory.NewDB(options) if err != nil { return nil, err } diff --git a/db/memory/db.go b/db/memory/db.go index 7737e25..96457d3 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -1,17 +1,18 @@ package memory import ( - "github.com/ByteStorage/FlyDB/db/column" + "github.com/ByteStorage/FlyDB/config" "github.com/ByteStorage/FlyDB/db/engine" + "github.com/ByteStorage/FlyDB/lib/wal" "sync" ) type Db struct { - option column.Options + option config.DbMemoryOptions db *engine.DB mem *MemTable oldList []*MemTable - wal *Wal + wal *wal.Wal oldListChan chan *MemTable totalSize int64 activeSize int64 @@ -19,22 +20,32 @@ type Db struct { errMsgCh chan []byte } -func NewDB(option column.Options) (*Db, error) { +func NewDB(option config.DbMemoryOptions) (*Db, error) { mem := NewMemTable() option.Option.DirPath = option.Option.DirPath + "/" + option.ColumnName db, err := engine.NewDB(option.Option) if err != nil { return nil, err } + walConfig := config.WalConfig{ + DirPath: option.Option.DirPath, + LogNum: option.LogNum, + FileSize: option.FileSize, + SaveTime: option.SaveTime, + } + newWal, err := wal.NewWal(walConfig) + if err != nil { + return nil, err + } d := &Db{ mem: mem, db: db, - wal: option.Wal, option: option, oldList: make([]*MemTable, 0), oldListChan: make(chan *MemTable, 1000000), activeSize: 0, totalSize: 0, + wal: newWal, pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, } go d.async() diff --git a/db/memory/db_test.go b/db/memory/db_test.go index bdcc368..eddb167 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -3,7 +3,6 @@ package memory import ( "fmt" "github.com/ByteStorage/FlyDB/config" - "github.com/ByteStorage/FlyDB/db/column" "github.com/ByteStorage/FlyDB/lib/randkv" "github.com/stretchr/testify/assert" "os" @@ -16,19 +15,13 @@ func TestPutAndGet(t *testing.T) { dir, _ := os.MkdirTemp("", "flydb-benchmark") opts.DirPath = dir opts.DataFileSize = 64 * 1024 * 1024 - - options := column.Options{ - Option: opts, - LogNum: 100, - SaveTime: 100 * 1000, - FileSize: 100 * 1024 * 1024, - MemSize: 2 * 1024 * 1024 * 1024, - TotalMemSize: 10 * 1024 * 1024 * 1024, + memoryOptions := config.DbMemoryOptions{ + Option: opts, + LogNum: 100, + FileSize: 100 * 1024 * 1024, + SaveTime: 100 * 1000, } - wal, err := NewWal(options) - assert.Nil(t, err) - options.Wal = wal - db, err := NewDB(options) + db, err := NewDB(memoryOptions) defer db.Clean() assert.Nil(t, err) assert.NotNil(t, db) diff --git a/db/memory/wal.go b/db/memory/wal.go deleted file mode 100644 index 7e3c9d7..0000000 --- a/db/memory/wal.go +++ /dev/null @@ -1,113 +0,0 @@ -package memory - -import ( - "encoding/binary" - "errors" - "github.com/ByteStorage/FlyDB/db/column" - "hash/crc32" - "time" - - "github.com/ByteStorage/FlyDB/db/fileio" -) - -const ( - // Record types - putType = byte(1) - deleteType = byte(2) - - // File names for WAL - walFileName = "/db.wal" -) - -// Wal is a write-ahead log. -type Wal struct { - m *fileio.MMapIO // MMapIOManager - logNum uint32 // Log number - saveTime int64 // Save time -} - -// NewWal creates a new WAL. -func NewWal(options column.Options) (*Wal, error) { - mapIO, err := fileio.NewMMapIOManager(options.Option.DirPath+walFileName, options.FileSize) - if err != nil { - return nil, err - } - return &Wal{ - m: mapIO, - logNum: options.LogNum, - saveTime: options.SaveTime, - }, nil -} - -// Put writes a record to the WAL. -// +---------+-----------+-----------+----------------+--- ... ---+ -// |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | -// +---------+-----------+-----------+----------------+--- ... ---+ -// Same as above, with the addition of -// Log number = 32bit log file number, so that we can distinguish between -// records written by the most recent log writer vs a previous one. -func (w *Wal) writeRecord(recordType byte, key, value []byte) error { - // Prepare the payload based on record type - var payload []byte - switch recordType { - case putType: - payload = append(key, value...) - case deleteType: - payload = key - default: - return errors.New("unknown record type") - } - - size := uint16(4 + len(payload)) // 4 bytes for log number - buffer := make([]byte, 4+2+1+4+len(payload)) - - // Compute CRC - crc := crc32.ChecksumIEEE(buffer[4:]) - binary.LittleEndian.PutUint32(buffer, crc) - - // Write size - binary.LittleEndian.PutUint16(buffer[4:], size) - - // Write type - buffer[4+2] = recordType - - // Write log number - binary.LittleEndian.PutUint32(buffer[4+2+1:], w.logNum) - - // Write payload - copy(buffer[4+2+1+4:], payload) - - _, err := w.m.Write(buffer) - return err -} - -// Put writes a record to the WAL. -func (w *Wal) Put(key []byte, value []byte) error { - return w.writeRecord(putType, key, value) -} - -// Delete writes a delete record to the WAL. -func (w *Wal) Delete(key []byte) error { - return w.writeRecord(deleteType, key, nil) -} - -// Save flushes the WAL to disk. -func (w *Wal) Save() error { - return w.m.Sync() -} - -// Close closes the WAL. -func (w *Wal) Close() error { - return w.m.Close() -} - -// AsyncSave periodically flushes the WAL to disk. -func (w *Wal) AsyncSave() { - for range time.Tick(time.Duration(w.saveTime)) { - err := w.Save() - if err != nil { - // TODO how to fix this error? - continue - } - } -} diff --git a/db/memory/wal_test.go b/db/memory/wal_test.go deleted file mode 100644 index 9f551ad..0000000 --- a/db/memory/wal_test.go +++ /dev/null @@ -1,30 +0,0 @@ -package memory - -import ( - "github.com/ByteStorage/FlyDB/config" - "github.com/ByteStorage/FlyDB/db/column" - "github.com/ByteStorage/FlyDB/lib/randkv" - "github.com/stretchr/testify/assert" - "testing" - "time" -) - -func TestWal_Put(t *testing.T) { - opt := column.Options{ - Option: config.DefaultOptions, - LogNum: 100, - FileSize: 100 * 1024 * 1024, - SaveTime: 100 * 1000, - } - wal, err := NewWal(opt) - defer wal.Close() - assert.Nil(t, err) - assert.NotNil(t, wal) - start := time.Now() - for n := 0; n < 500000; n++ { - err = wal.Put(randkv.GetTestKey(n), randkv.RandomValue(24)) - assert.Nil(t, err) - } - end := time.Now() - t.Log("put time: ", end.Sub(start).String()) -} diff --git a/db/memory/bloom.go b/lib/bloom/bloom.go similarity index 99% rename from db/memory/bloom.go rename to lib/bloom/bloom.go index a4f6acb..563e15c 100644 --- a/db/memory/bloom.go +++ b/lib/bloom/bloom.go @@ -1,4 +1,4 @@ -package memory +package bloom import "math" diff --git a/db/memory/bloom_test.go b/lib/bloom/bloom_test.go similarity index 99% rename from db/memory/bloom_test.go rename to lib/bloom/bloom_test.go index 52da531..e7f9d4b 100644 --- a/db/memory/bloom_test.go +++ b/lib/bloom/bloom_test.go @@ -1,4 +1,4 @@ -package memory +package bloom import ( "testing" diff --git a/lib/wal/wal.go b/lib/wal/wal.go index cd76791..540b279 100644 --- a/lib/wal/wal.go +++ b/lib/wal/wal.go @@ -1,49 +1,113 @@ package wal import ( - "github.com/tidwall/wal" + "encoding/binary" + "errors" + "github.com/ByteStorage/FlyDB/config" + "hash/crc32" + "time" + + "github.com/ByteStorage/FlyDB/db/fileio" ) -const FileName = "/tmp/flydb/wal" +const ( + // Record types + putType = byte(1) + deleteType = byte(2) + + // File names for WAL + walFileName = "/db.wal" +) +// Wal is a write-ahead log. type Wal struct { - log *wal.Log + m *fileio.MMapIO // MMapIOManager + logNum uint32 // Log number + saveTime int64 // Save time } -func (w *Wal) Write(data []byte) error { - index, err := w.log.LastIndex() +// NewWal creates a new WAL. +func NewWal(options config.WalConfig) (*Wal, error) { + mapIO, err := fileio.NewMMapIOManager(options.DirPath+walFileName, options.FileSize) if err != nil { - return err + return nil, err } - return w.log.Write(index+1, data) + return &Wal{ + m: mapIO, + logNum: options.LogNum, + saveTime: options.SaveTime, + }, nil } -func (w *Wal) Read(index uint64) ([]byte, error) { - return w.log.Read(index) +// Put writes a record to the WAL. +// +---------+-----------+-----------+----------------+--- ... ---+ +// |CRC (4B) | Size (2B) | Type (1B) | Log number (4B)| Payload | +// +---------+-----------+-----------+----------------+--- ... ---+ +// Same as above, with the addition of +// Log number = 32bit log file number, so that we can distinguish between +// records written by the most recent log writer vs a previous one. +func (w *Wal) writeRecord(recordType byte, key, value []byte) error { + // Prepare the payload based on record type + var payload []byte + switch recordType { + case putType: + payload = append(key, value...) + case deleteType: + payload = key + default: + return errors.New("unknown record type") + } + + size := uint16(4 + len(payload)) // 4 bytes for log number + buffer := make([]byte, 4+2+1+4+len(payload)) + + // Compute CRC + crc := crc32.ChecksumIEEE(buffer[4:]) + binary.LittleEndian.PutUint32(buffer, crc) + + // Write size + binary.LittleEndian.PutUint16(buffer[4:], size) + + // Write type + buffer[4+2] = recordType + + // Write log number + binary.LittleEndian.PutUint32(buffer[4+2+1:], w.logNum) + + // Write payload + copy(buffer[4+2+1+4:], payload) + + _, err := w.m.Write(buffer) + return err } -func (w *Wal) ReadLast() ([]byte, error) { - index, err := w.log.LastIndex() - if err != nil { - return nil, err - } - return w.log.Read(index) +// Put writes a record to the WAL. +func (w *Wal) Put(key []byte, value []byte) error { + return w.writeRecord(putType, key, value) } -func New() (*Wal, error) { - log, err := wal.Open(FileName, nil) - if err != nil { - return &Wal{}, err - } - index, err := log.LastIndex() - if err != nil { - return &Wal{}, err - } - if index == 0 { - err := log.Write(1, []byte("--------------------")) +// Delete writes a delete record to the WAL. +func (w *Wal) Delete(key []byte) error { + return w.writeRecord(deleteType, key, nil) +} + +// Save flushes the WAL to disk. +func (w *Wal) Save() error { + return w.m.Sync() +} + +// Close closes the WAL. +func (w *Wal) Close() error { + return w.m.Close() +} + +// AsyncSave periodically flushes the WAL to disk. +func (w *Wal) AsyncSave() { + for range time.Tick(time.Duration(w.saveTime)) { + err := w.Save() if err != nil { - return &Wal{}, err + // TODO how to fix this error? + continue } } - return &Wal{log: log}, nil } diff --git a/lib/wal/wal_db.go b/lib/wal/wal_db.go deleted file mode 100644 index 7184ee5..0000000 --- a/lib/wal/wal_db.go +++ /dev/null @@ -1,57 +0,0 @@ -package wal - -import "encoding/json" - -type WriteMessage struct { - Key []byte - Value []byte - Type int //0: put, 1: get , 2: delete , 3: update -} - -func (w *Wal) Put(key []byte, value []byte) error { - marshal, err := json.Marshal(&WriteMessage{ - Key: key, - Value: value, - Type: 0, - }) - if err != nil { - return err - } - return w.Write(marshal) -} - -func (w *Wal) Get(key []byte) error { - marshal, err := json.Marshal(&WriteMessage{ - Key: key, - Type: 1, - Value: nil, - }) - if err != nil { - return err - } - return w.Write(marshal) -} - -func (w *Wal) Delete(key []byte) error { - marshal, err := json.Marshal(&WriteMessage{ - Key: key, - Type: 2, - Value: nil, - }) - if err != nil { - return err - } - return w.Write(marshal) -} - -func (w *Wal) Update(key []byte, value []byte) error { - marshal, err := json.Marshal(&WriteMessage{ - Key: key, - Value: value, - Type: 0, - }) - if err != nil { - return err - } - return w.Write(marshal) -} diff --git a/lib/wal/wal_test.go b/lib/wal/wal_test.go index f856f65..116f418 100644 --- a/lib/wal/wal_test.go +++ b/lib/wal/wal_test.go @@ -1,60 +1,30 @@ package wal import ( + "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/db/column" + "github.com/ByteStorage/FlyDB/lib/randkv" "github.com/stretchr/testify/assert" "testing" + "time" ) -func TestNew(t *testing.T) { - wal, err := New() +func TestWal_Put(t *testing.T) { + opt := column.Options{ + Option: config.DefaultOptions, + LogNum: 100, + FileSize: 100 * 1024 * 1024, + SaveTime: 100 * 1000, + } + wal, err := NewWal(opt) + defer wal.Close() assert.Nil(t, err) assert.NotNil(t, wal) - index, err := wal.log.LastIndex() - assert.Nil(t, err) - assert.Equal(t, uint64(1), index) -} - -func TestWal_Write(t *testing.T) { - wal, err := New() - assert.Nil(t, err) - index, err := wal.log.LastIndex() - - assert.Nil(t, err) - assert.Equal(t, uint64(1), index) - - data := []byte("test data") - err = wal.Write(data) - assert.Nil(t, err) - - index, err = wal.log.LastIndex() - assert.Nil(t, err) - assert.Equal(t, uint64(2), index) -} - -func TestWal_Read(t *testing.T) { - wal, err := New() - assert.Nil(t, err) - - data := []byte("test data") - err = wal.Write(data) - assert.Nil(t, err) - - index, err := wal.log.LastIndex() - - readData, err := wal.Read(index) - assert.Nil(t, err) - assert.Equal(t, data, readData) -} - -func TestWal_ReadLast(t *testing.T) { - wal, err := New() - assert.Nil(t, err) - - data := []byte("test data") - err = wal.Write(data) - assert.Nil(t, err) - - readData, err := wal.ReadLast() - assert.Nil(t, err) - assert.Equal(t, data, readData) + start := time.Now() + for n := 0; n < 500000; n++ { + err = wal.Put(randkv.GetTestKey(n), randkv.RandomValue(24)) + assert.Nil(t, err) + } + end := time.Now() + t.Log("put time: ", end.Sub(start).String()) } From ee2a46b766b07565823b68efa0b017b49c7752cf Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 12:36:11 +0800 Subject: [PATCH 17/35] fix import cycle --- config/column_options.go | 11 ++-- config/db_memory_options.go | 3 ++ db/column/column.go | 52 ++++--------------- db/memory/db.go | 24 +++++---- db/memory/memory.go | 2 +- lib/wal/wal.go | 32 ++++++++++-- .../wal_config.go => lib/wal/wal_options.go | 4 +- lib/wal/wal_test.go | 8 ++- 8 files changed, 66 insertions(+), 70 deletions(-) rename config/wal_config.go => lib/wal/wal_options.go (63%) diff --git a/config/column_options.go b/config/column_options.go index 058d449..86f2d6e 100644 --- a/config/column_options.go +++ b/config/column_options.go @@ -1,11 +1,8 @@ package config +import "github.com/ByteStorage/FlyDB/lib/wal" + type ColumnOptions struct { - Option Options - LogNum uint32 - FileSize int64 - SaveTime int64 - MemSize int64 - TotalMemSize int64 - ColumnName string + DbMemoryOptions DbMemoryOptions + WalOptions wal.Options } diff --git a/config/db_memory_options.go b/config/db_memory_options.go index ec40bba..ed3bc01 100644 --- a/config/db_memory_options.go +++ b/config/db_memory_options.go @@ -1,5 +1,7 @@ package config +import "github.com/ByteStorage/FlyDB/lib/wal" + type DbMemoryOptions struct { Option Options LogNum uint32 @@ -8,4 +10,5 @@ type DbMemoryOptions struct { MemSize int64 TotalMemSize int64 ColumnName string + Wal *wal.Wal } diff --git a/db/column/column.go b/db/column/column.go index d712907..81c934d 100644 --- a/db/column/column.go +++ b/db/column/column.go @@ -33,13 +33,7 @@ type Column interface { // NewColumn create a column family func NewColumn(option config.ColumnOptions) (Column, error) { // create wal, all column family share a wal - walConf := config.WalConfig{ - DirPath: option.Option.DirPath, - LogNum: option.LogNum, - SaveTime: option.SaveTime, - FileSize: option.FileSize, - } - w, err := wal.NewWal(walConf) + w, err := wal.NewWal(option.WalOptions) if err != nil { return nil, err } @@ -65,22 +59,14 @@ func NewColumn(option config.ColumnOptions) (Column, error) { } // if column family not exists, create a new column family - if option.ColumnName == "" { - option.ColumnName = "default" + if option.DbMemoryOptions.ColumnName == "" { + option.DbMemoryOptions.ColumnName = "default" } - options := config.DbMemoryOptions{ - Option: option.Option, - LogNum: option.LogNum, - FileSize: option.FileSize, - SaveTime: option.SaveTime, - MemSize: option.MemSize, - TotalMemSize: option.TotalMemSize, - ColumnName: option.ColumnName, - } + option.DbMemoryOptions.Wal = w // create a new db - db, err := memory.NewDB(options) + db, err := memory.NewDB(option.DbMemoryOptions) if err != nil { return nil, err } @@ -88,7 +74,7 @@ func NewColumn(option config.ColumnOptions) (Column, error) { option: option, mux: sync.RWMutex{}, columnFamily: map[string]*memory.Db{ - option.ColumnName: db, + option.DbMemoryOptions.ColumnName: db, }, wal: w, }, nil @@ -107,16 +93,7 @@ func (c *column) CreateColumnFamily(name string) error { if _, ok := c.columnFamily[name]; ok { return errors.New("column family already exists") } - options := config.DbMemoryOptions{ - Option: c.option.Option, - LogNum: c.option.LogNum, - FileSize: c.option.FileSize, - SaveTime: c.option.SaveTime, - MemSize: c.option.MemSize, - TotalMemSize: c.option.TotalMemSize, - ColumnName: c.option.ColumnName, - } - db, err := memory.NewDB(options) + db, err := memory.NewDB(c.option.DbMemoryOptions) if err != nil { return err } @@ -161,7 +138,7 @@ func (c *column) Keys(cf string) ([][]byte, error) { } func loadColumn(option config.ColumnOptions) (map[string]*memory.Db, error) { - base := option.Option.DirPath + base := option.DbMemoryOptions.Option.DirPath base = strings.Trim(base, "/") // Check if the base path exists if _, err := os.Stat(base); os.IsNotExist(err) { @@ -177,17 +154,8 @@ func loadColumn(option config.ColumnOptions) (map[string]*memory.Db, error) { if dir.IsDir() { colName := dir.Name() dirPath := base + "/" + colName - option.Option.DirPath = dirPath - options := config.DbMemoryOptions{ - Option: option.Option, - LogNum: option.LogNum, - FileSize: option.FileSize, - SaveTime: option.SaveTime, - MemSize: option.MemSize, - TotalMemSize: option.TotalMemSize, - ColumnName: option.ColumnName, - } - db, err := memory.NewDB(options) + option.DbMemoryOptions.Option.DirPath = dirPath + db, err := memory.NewDB(option.DbMemoryOptions) if err != nil { return nil, err } diff --git a/db/memory/db.go b/db/memory/db.go index 96457d3..a9c9c29 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -27,16 +27,20 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { if err != nil { return nil, err } - walConfig := config.WalConfig{ - DirPath: option.Option.DirPath, - LogNum: option.LogNum, - FileSize: option.FileSize, - SaveTime: option.SaveTime, - } - newWal, err := wal.NewWal(walConfig) - if err != nil { - return nil, err + w := option.Wal + if option.Wal == nil { + walOptions := wal.Options{ + DirPath: option.Option.DirPath, + FileSize: option.FileSize, + SaveTime: option.SaveTime, + LogNum: option.LogNum, + } + w, err = wal.NewWal(walOptions) + if err != nil { + return nil, err + } } + d := &Db{ mem: mem, db: db, @@ -45,7 +49,7 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { oldListChan: make(chan *MemTable, 1000000), activeSize: 0, totalSize: 0, - wal: newWal, + wal: w, pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, } go d.async() diff --git a/db/memory/memory.go b/db/memory/memory.go index 990b0d0..99194fb 100644 --- a/db/memory/memory.go +++ b/db/memory/memory.go @@ -5,7 +5,7 @@ import ( "sync" ) -// MemTable is a in-memory table +// MemTable is an in-memory table type MemTable struct { table map[string][]byte // key -> value mutex sync.RWMutex // protect table diff --git a/lib/wal/wal.go b/lib/wal/wal.go index 540b279..aa1f373 100644 --- a/lib/wal/wal.go +++ b/lib/wal/wal.go @@ -3,8 +3,8 @@ package wal import ( "encoding/binary" "errors" - "github.com/ByteStorage/FlyDB/config" "hash/crc32" + "os" "time" "github.com/ByteStorage/FlyDB/db/fileio" @@ -24,11 +24,28 @@ type Wal struct { m *fileio.MMapIO // MMapIOManager logNum uint32 // Log number saveTime int64 // Save time + dirPath string } // NewWal creates a new WAL. -func NewWal(options config.WalConfig) (*Wal, error) { - mapIO, err := fileio.NewMMapIOManager(options.DirPath+walFileName, options.FileSize) +func NewWal(options Options) (*Wal, error) { + fileName := options.DirPath + walFileName + stat, err := os.Stat(fileName) + if err != nil { + if os.IsNotExist(err) { + err := os.MkdirAll(options.DirPath, os.ModePerm) + if err != nil { + return nil, err + } + _, err = os.Create(fileName) + if err != nil { + return nil, err + } + } + } else { + options.LogNum = uint32(stat.Size() / options.FileSize) + } + mapIO, err := fileio.NewMMapIOManager(fileName, options.FileSize) if err != nil { return nil, err } @@ -36,6 +53,7 @@ func NewWal(options config.WalConfig) (*Wal, error) { m: mapIO, logNum: options.LogNum, saveTime: options.SaveTime, + dirPath: options.DirPath, }, nil } @@ -101,6 +119,14 @@ func (w *Wal) Close() error { return w.m.Close() } +func (w *Wal) Clean() error { + err := w.m.Close() + if err != nil { + return err + } + return os.RemoveAll(w.dirPath) +} + // AsyncSave periodically flushes the WAL to disk. func (w *Wal) AsyncSave() { for range time.Tick(time.Duration(w.saveTime)) { diff --git a/config/wal_config.go b/lib/wal/wal_options.go similarity index 63% rename from config/wal_config.go rename to lib/wal/wal_options.go index 0be7e92..9caf5a3 100644 --- a/config/wal_config.go +++ b/lib/wal/wal_options.go @@ -1,6 +1,6 @@ -package config +package wal -type WalConfig struct { +type Options struct { DirPath string FileSize int64 SaveTime int64 diff --git a/lib/wal/wal_test.go b/lib/wal/wal_test.go index 116f418..8057bb4 100644 --- a/lib/wal/wal_test.go +++ b/lib/wal/wal_test.go @@ -1,8 +1,6 @@ package wal import ( - "github.com/ByteStorage/FlyDB/config" - "github.com/ByteStorage/FlyDB/db/column" "github.com/ByteStorage/FlyDB/lib/randkv" "github.com/stretchr/testify/assert" "testing" @@ -10,14 +8,14 @@ import ( ) func TestWal_Put(t *testing.T) { - opt := column.Options{ - Option: config.DefaultOptions, + opt := Options{ + DirPath: "./wal_test", LogNum: 100, FileSize: 100 * 1024 * 1024, SaveTime: 100 * 1000, } wal, err := NewWal(opt) - defer wal.Close() + defer wal.Clean() assert.Nil(t, err) assert.NotNil(t, wal) start := time.Now() From 2b8a0955a7b42207c463457eba126284cf761458 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 12:47:02 +0800 Subject: [PATCH 18/35] fix import cycle --- db/column/column.go | 6 ++++++ db/memory/db.go | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/db/column/column.go b/db/column/column.go index 81c934d..d6c2101 100644 --- a/db/column/column.go +++ b/db/column/column.go @@ -63,6 +63,7 @@ func NewColumn(option config.ColumnOptions) (Column, error) { option.DbMemoryOptions.ColumnName = "default" } + // set wal, the wal is a global wal of all column family option.DbMemoryOptions.Wal = w // create a new db @@ -93,6 +94,7 @@ func (c *column) CreateColumnFamily(name string) error { if _, ok := c.columnFamily[name]; ok { return errors.New("column family already exists") } + c.option.DbMemoryOptions.ColumnName = name db, err := memory.NewDB(c.option.DbMemoryOptions) if err != nil { return err @@ -107,6 +109,10 @@ func (c *column) DropColumnFamily(name string) error { if _, ok := c.columnFamily[name]; !ok { return errors.New("column family not exists") } + err := os.RemoveAll(c.option.DbMemoryOptions.Option.DirPath + "/" + name) + if err != nil { + return err + } delete(c.columnFamily, name) return nil } diff --git a/db/memory/db.go b/db/memory/db.go index a9c9c29..ec8c3ce 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -20,14 +20,19 @@ type Db struct { errMsgCh chan []byte } +// NewDB create a new db of wal and memTable func NewDB(option config.DbMemoryOptions) (*Db, error) { + // create a new memTable mem := NewMemTable() + // dir path has been changed to dir path + column name option.Option.DirPath = option.Option.DirPath + "/" + option.ColumnName db, err := engine.NewDB(option.Option) if err != nil { return nil, err } w := option.Wal + // if wal is nil, create a new wal + // if wal is not nil, the wal was created by column family if option.Wal == nil { walOptions := wal.Options{ DirPath: option.Option.DirPath, From 64fdea5f28c5f278f1de072f2c33b4a6098ac505 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 13:17:31 +0800 Subject: [PATCH 19/35] feat: wal + memory read/write --- db/engine/db.go | 3 ++- db/memory/db.go | 24 +++++++++++++++++------- db/memory/db_test.go | 10 ++++++---- db/memory/error_log.go | 5 ----- 4 files changed, 25 insertions(+), 17 deletions(-) delete mode 100644 db/memory/error_log.go diff --git a/db/engine/db.go b/db/engine/db.go index a27cd88..80d9189 100644 --- a/db/engine/db.go +++ b/db/engine/db.go @@ -4,6 +4,7 @@ package engine import ( + "fmt" "github.com/ByteStorage/FlyDB/config" data2 "github.com/ByteStorage/FlyDB/db/data" "github.com/ByteStorage/FlyDB/db/index" @@ -549,7 +550,7 @@ func (db *DB) Clean() { _ = db.Close() err := os.RemoveAll(db.options.DirPath) if err != nil { - panic(err) + _ = fmt.Errorf("clean db error: %v", err) } } } diff --git a/db/memory/db.go b/db/memory/db.go index ec8c3ce..8869238 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -4,6 +4,7 @@ import ( "github.com/ByteStorage/FlyDB/config" "github.com/ByteStorage/FlyDB/db/engine" "github.com/ByteStorage/FlyDB/lib/wal" + "os" "sync" ) @@ -17,7 +18,7 @@ type Db struct { totalSize int64 activeSize int64 pool *sync.Pool - errMsgCh chan []byte + errMsgCh chan string } // NewDB create a new db of wal and memTable @@ -59,13 +60,15 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { } go d.async() go d.wal.AsyncSave() + go d.handlerErrMsg() return d, nil } func (d *Db) handlerErrMsg() { + log := d.option.Option.DirPath + "/error.log" for msg := range d.errMsgCh { - // TODO handle error: either log it, retry, or whatever makes sense for your application - _ = msg + // write to log + _ = os.WriteFile(log, []byte(msg), 0666) } } @@ -75,12 +78,19 @@ func (d *Db) Put(key []byte, value []byte) error { valueLen := int64(len(value)) d.pool.Put(func() { - // Write to WAL - err := d.wal.Put(key, value) - if err != nil { + // Write to wal, try 3 times + ok := false + for i := 0; i < 3; i++ { + err := d.wal.Put(key, value) + if err == nil { + ok = true + break + } + } + if !ok { err := d.wal.Delete(key) if err != nil { - d.errMsgCh <- []byte(err.Error()) + d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() } } }) diff --git a/db/memory/db_test.go b/db/memory/db_test.go index eddb167..1cae43a 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -16,10 +16,12 @@ func TestPutAndGet(t *testing.T) { opts.DirPath = dir opts.DataFileSize = 64 * 1024 * 1024 memoryOptions := config.DbMemoryOptions{ - Option: opts, - LogNum: 100, - FileSize: 100 * 1024 * 1024, - SaveTime: 100 * 1000, + Option: opts, + LogNum: 100, + FileSize: 100 * 1024 * 1024, + SaveTime: 100 * 1000, + MemSize: 256 * 1024 * 1024, + TotalMemSize: 2 * 1024 * 1024 * 1024, } db, err := NewDB(memoryOptions) defer db.Clean() diff --git a/db/memory/error_log.go b/db/memory/error_log.go deleted file mode 100644 index e839343..0000000 --- a/db/memory/error_log.go +++ /dev/null @@ -1,5 +0,0 @@ -package memory - -func Errorf(format string, args ...interface{}) { - -} From a74bd64ac0ba7736c6f387e64e2e35b0f41aa433 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 15:23:14 +0800 Subject: [PATCH 20/35] feat: add bloom for memory read/write --- db/memory/memory.go | 13 +++ go.mod | 6 +- go.sum | 15 +--- lib/bloom/bloom.go | 180 +++++++++++----------------------------- lib/bloom/bloom_test.go | 153 +++++----------------------------- 5 files changed, 84 insertions(+), 283 deletions(-) diff --git a/db/memory/memory.go b/db/memory/memory.go index 99194fb..45df832 100644 --- a/db/memory/memory.go +++ b/db/memory/memory.go @@ -2,6 +2,7 @@ package memory import ( "errors" + "github.com/ByteStorage/FlyDB/lib/bloom" "sync" ) @@ -9,12 +10,15 @@ import ( type MemTable struct { table map[string][]byte // key -> value mutex sync.RWMutex // protect table + bloom *bloom.Filter // bloom filter } // NewMemTable create a new MemTable func NewMemTable() *MemTable { return &MemTable{ table: make(map[string][]byte), + // Initialize with no keys and 10 bits per key + bloom: bloom.NewBloomFilter(1000, 0.01), } } @@ -23,6 +27,8 @@ func (m *MemTable) Put(key string, value []byte) { m.mutex.Lock() defer m.mutex.Unlock() m.table[key] = value + // Add the key to the bloom filter + m.bloom.Add([]byte(key)) } // Get a value from the table @@ -30,6 +36,11 @@ func (m *MemTable) Get(key string) ([]byte, error) { m.mutex.RLock() defer m.mutex.RUnlock() + // Immediate return if the key is not in the bloom filter + if !m.bloom.MayContainItem([]byte(key)) { + return nil, errors.New("key not found") + } + value, ok := m.table[key] if !ok { return nil, errors.New("key not found") @@ -39,6 +50,8 @@ func (m *MemTable) Get(key string) ([]byte, error) { } // Delete a key from the table +// Note: Bloom filters don't support deletion without affecting accuracy +// so we don't remove the key from the bloom filter. func (m *MemTable) Delete(key string) { m.mutex.Lock() defer m.mutex.Unlock() diff --git a/go.mod b/go.mod index 231c2d5..d90ae5e 100644 --- a/go.mod +++ b/go.mod @@ -18,9 +18,9 @@ require ( github.com/klauspost/reedsolomon v1.11.7 github.com/pkg/errors v0.9.1 github.com/plar/go-adaptive-radix-tree v1.0.5 + github.com/spaolacci/murmur3 v1.1.0 github.com/stretchr/testify v1.8.2 github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c - github.com/tidwall/wal v1.1.7 go.etcd.io/bbolt v1.3.7 go.uber.org/zap v1.24.0 golang.org/x/crypto v0.0.0-20190701094942-4def268fd1a4 @@ -49,10 +49,6 @@ require ( github.com/mattn/go-isatty v0.0.16 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rogpeppe/go-internal v1.9.0 // indirect - github.com/tidwall/gjson v1.14.4 // indirect - github.com/tidwall/match v1.1.1 // indirect - github.com/tidwall/pretty v1.2.1 // indirect - github.com/tidwall/tinylru v1.2.1 // indirect go.uber.org/atomic v1.7.0 // indirect go.uber.org/multierr v1.6.0 // indirect golang.org/x/net v0.8.0 // indirect diff --git a/go.sum b/go.sum index 028de76..ac66ffa 100644 --- a/go.sum +++ b/go.sum @@ -169,6 +169,8 @@ github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZV github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -186,19 +188,6 @@ github.com/stretchr/testify v1.8.2 h1:+h33VjcLVPDHtOdpUCuF+7gSuG3yGIftsP1YvFihtJ github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c h1:g+WoO5jjkqGAzHWCjJB1zZfXPIAaDpzXIEJ0eS6B5Ok= github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8= -github.com/tidwall/gjson v1.10.2/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/gjson v1.14.4 h1:uo0p8EbA09J7RQaflQ1aBRffTR7xedD2bcIVSYxLnkM= -github.com/tidwall/gjson v1.14.4/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= -github.com/tidwall/match v1.1.1 h1:+Ho715JplO36QYgwN9PGYNhgZvoUSc9X2c80KVTi+GA= -github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= -github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= -github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= -github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= -github.com/tidwall/tinylru v1.1.0/go.mod h1:3+bX+TJ2baOLMWTnlyNWHh4QMnFyARg2TLTQ6OFbzw8= -github.com/tidwall/tinylru v1.2.1 h1:VgBr72c2IEr+V+pCdkPZUwiQ0KJknnWIYbhxAVkYfQk= -github.com/tidwall/tinylru v1.2.1/go.mod h1:9bQnEduwB6inr2Y7AkBP7JPgCkyrhTV/ZpX0oOOpBI4= -github.com/tidwall/wal v1.1.7 h1:emc1TRjIVsdKKSnpwGBAcsAGg0767SvUk8+ygx7Bb+4= -github.com/tidwall/wal v1.1.7/go.mod h1:r6lR1j27W9EPalgHiB7zLJDYu3mzW5BQP5KrzBpYY/E= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= diff --git a/lib/bloom/bloom.go b/lib/bloom/bloom.go index 563e15c..684a83d 100644 --- a/lib/bloom/bloom.go +++ b/lib/bloom/bloom.go @@ -1,151 +1,65 @@ package bloom -import "math" - -const ( - seed = 0xbc9f1d34 - m = 0xc6a4a793 +import ( + "github.com/spaolacci/murmur3" + "math" ) -// Filter is an encoded set of []byte keys. -type Filter []byte - -// MayContainKey returns whether the filter may contain given key. False positives -func (f Filter) MayContainKey(k []byte) bool { - return f.mayContain(Hash(k)) +// Filter represents a structure for the filter itself. +type Filter struct { + bitSet []bool // Bit array to hold the state of the data + size uint32 // Size of the bit array + numHashes uint8 // Number of hash functions to use } -// MayContain returns whether the filter may contain given key. False positives -// are possible, where it returns true for keys not in the original set. -func (f Filter) mayContain(h uint32) bool { - // check if the filter is empty - if len(f) < 2 { - return false - } - // obtain the number of hash functions - k := f[len(f)-1] - // if k > 30, this is reserved for potentially new encodings for short Bloom filters. - if k > 30 { - // This is reserved for potentially new encodings for short Bloom filters. - // Consider it a match. - return true - } - // calculate the total number of bits in the filter. - nBits := uint32(8 * (len(f) - 1)) - // change the hash value by right shift and left shift to generate different bit positions for subsequent iterations. - delta := h>>17 | h<<15 - for j := uint8(0); j < k; j++ { - // For each hash function, calculate the bit position bitPos - bitPos := h % nBits - // Check if the corresponding bit has been set. - // If the bit has not been set, the key is definitely not in the set, and false is returned. - if f[bitPos/8]&(1<<(bitPos%8)) == 0 { - return false - } - h += delta - } - return true -} - -// NewFilter returns a new Bloom filter that encodes a set of []byte keys with -// the given number of bits per key, approximately. -// -// A good bitsPerKey value is 10, which yields a filter with ~ 1% false -// positive rate. -func NewFilter(keys []uint32, bitsPerKey int) Filter { - return Filter(appendFilter(nil, keys, bitsPerKey)) -} - -// BloomBitsPerKey returns the bits per key required by bloomfilter based on -// the false positive rate. -func BloomBitsPerKey(numEntries int, fp float64) int { - size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2) - locs := math.Ceil(float64(0.69314718056) * size / float64(numEntries)) - return int(locs) -} +// NewBloomFilter initializes a new Bloom filter based on the expected number of items and desired false positive rate. +func NewBloomFilter(expectedItems uint32, fpRate float64) *Filter { + // Calculate the size of bit array using the expected number of items and desired false positive rate + size := uint32(-float64(expectedItems) * math.Log(fpRate) / (math.Ln2 * math.Ln2)) + // Calculate the optimal number of hash functions based on the size of bit array and expected number of items + numHashes := uint8(float64(size) / float64(expectedItems) * math.Ln2) -func appendFilter(buf []byte, keys []uint32, bitsPerKey int) []byte { - if bitsPerKey < 0 { - bitsPerKey = 0 + return &Filter{ + bitSet: make([]bool, size), + size: size, + numHashes: numHashes, } - // 0.69 is approximately ln(2). - k := uint32(float64(bitsPerKey) * 0.69) - if k < 1 { - k = 1 - } - if k > 30 { - k = 30 - } - - nBits := len(keys) * bitsPerKey - // For small len(keys), we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. - if nBits < 64 { - nBits = 64 - } - nBytes := (nBits + 7) / 8 - nBits = nBytes * 8 - buf, filter := extend(buf, nBytes+1) +} - for _, h := range keys { - delta := h>>17 | h<<15 - for j := uint32(0); j < k; j++ { - bitPos := h % uint32(nBits) - filter[bitPos/8] |= 1 << (bitPos % 8) - h += delta - } +// Add inserts an item into the Bloom filter. +func (f *Filter) Add(item []byte) { + hashes := f.hash(item) + // For each hash value, find the position and set the bit to true + for i := uint8(0); i < f.numHashes; i++ { + position := hashes[i] % f.size + f.bitSet[position] = true } - filter[nBytes] = uint8(k) - - return buf } -// extend appends n zero bytes to b. It returns the overall slice (of length -// n+len(originalB)) and the slice of n trailing zeroes. -func extend(b []byte, n int) (overall, trailer []byte) { - want := n + len(b) - if want <= cap(b) { - overall = b[:want] - trailer = overall[len(b):] - for i := range trailer { - trailer[i] = 0 - } - } else { - // Grow the capacity exponentially, with a 1KiB minimum. - c := 1024 - for c < want { - c += c / 4 +// MayContainItem checks if an item is possibly in the set. +// If it returns false, the item is definitely not in the set. +// If it returns true, the item might be in the set, but it can also be a false positive. +func (f *Filter) MayContainItem(item []byte) bool { + hashes := f.hash(item) + for i := uint8(0); i < f.numHashes; i++ { + position := hashes[i] % f.size + if !f.bitSet[position] { + return false } - overall = make([]byte, want, c) - trailer = overall[len(b):] - copy(overall, b) } - return overall, trailer + return true } -// Hash implements a hashing algorithm similar to the Murmur hash. -func Hash(b []byte) uint32 { - // The original algorithm uses a seed of 0x9747b28c. - h := uint32(seed) ^ uint32(len(b))*m - // Pick up four bytes at a time. - for ; len(b) >= 4; b = b[4:] { - // The original algorithm uses the following commented out code to load - h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 - h *= m - h ^= h >> 16 - } - // Pick up remaining bytes. - switch len(b) { - case 3: - h += uint32(b[2]) << 16 - fallthrough - case 2: - h += uint32(b[1]) << 8 - fallthrough - case 1: - h += uint32(b[0]) - h *= m - h ^= h >> 24 +// hash produces multiple hash values for an item. +// It leverages two hash values from murmur3 and generates as many as needed through a linear combination. +func (f *Filter) hash(item []byte) []uint32 { + h1, h2 := murmur3.Sum128(item) // Get two 64-bit hash values + var result []uint32 + + // Use the two hash values to generate the required number of hash functions. + for i := uint8(0); i < f.numHashes; i++ { + h := h1 + uint64(i)*h2 + result = append(result, uint32(h)) } - return h + return result } diff --git a/lib/bloom/bloom_test.go b/lib/bloom/bloom_test.go index e7f9d4b..6c5f0e4 100644 --- a/lib/bloom/bloom_test.go +++ b/lib/bloom/bloom_test.go @@ -1,143 +1,32 @@ package bloom import ( + "github.com/stretchr/testify/assert" "testing" ) -func (f Filter) String() string { - s := make([]byte, 8*len(f)) - for i, x := range f { - for j := 0; j < 8; j++ { - if x&(1<> 0) - b[1] = uint8(uint32(i) >> 8) - b[2] = uint8(uint32(i) >> 16) - b[3] = uint8(uint32(i) >> 24) - return b - } - - nMediocreFilters, nGoodFilters := 0, 0 -loop: - for length := 1; length <= 10000; length = nextLength(length) { - keys := make([][]byte, 0, length) - for i := 0; i < length; i++ { - keys = append(keys, le32(i)) - } - var hashes []uint32 - for _, key := range keys { - hashes = append(hashes, Hash(key)) - } - f := NewFilter(hashes, 10) - - if len(f) > (length*10/8)+40 { - t.Errorf("length=%d: len(f)=%d is too large", length, len(f)) - continue - } - - // All added keys must match. - for _, key := range keys { - if !f.MayContainKey(key) { - t.Errorf("length=%d: did not contain key %q", length, key) - continue loop - } - } - - // Check false positive rate. - nFalsePositive := 0 - for i := 0; i < 10000; i++ { - if f.MayContainKey(le32(1e9 + i)) { - nFalsePositive++ - } - } - if nFalsePositive > 0.02*10000 { - t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive) - continue - } - if nFalsePositive > 0.0125*10000 { - nMediocreFilters++ - } else { - nGoodFilters++ - } - } - - if nMediocreFilters > nGoodFilters/5 { - t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters) - } +func TestFilter_Add(t *testing.T) { + filter := NewBloomFilter(1000, 0.01) + filter.Add([]byte("hello")) + assert.True(t, filter.MayContainItem([]byte("hello"))) + assert.False(t, filter.MayContainItem([]byte("world"))) } -func TestHash(t *testing.T) { - // The magic want numbers come from running the C++ leveldb code in hash.cc. - testCases := []struct { - s string - want uint32 - }{ - {"", 0xbc9f1d34}, - {"g", 0xd04a8bda}, - {"go", 0x3e0b0745}, - {"gop", 0x0c326610}, - {"goph", 0x8c9d6390}, - {"gophe", 0x9bfd4b0a}, - {"gopher", 0xa78edc7c}, - {"I had a dream it would end this way.", 0xe14a9db9}, - } - for _, tc := range testCases { - if got := Hash([]byte(tc.s)); got != tc.want { - t.Errorf("s=%q: got 0x%08x, want 0x%08x", tc.s, got, tc.want) - } - } +func TestFilter_MayContainItem(t *testing.T) { + filter := NewBloomFilter(1000, 0.01) + filter.Add([]byte("hello")) + filter.Add([]byte("world")) + filter.Add([]byte("flydb")) + filter.Add([]byte("bloom")) + assert.True(t, filter.MayContainItem([]byte("hello"))) + assert.True(t, filter.MayContainItem([]byte("world"))) + assert.True(t, filter.MayContainItem([]byte("flydb"))) + assert.True(t, filter.MayContainItem([]byte("bloom"))) + assert.False(t, filter.MayContainItem([]byte("fly"))) + assert.False(t, filter.MayContainItem([]byte("db"))) } From a72b7f9ed86ddb50bdccf9dcd92ec30e90fa9bb0 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 15:31:15 +0800 Subject: [PATCH 21/35] feat: remove bloom for memory read/write --- db/memory/memory.go | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/db/memory/memory.go b/db/memory/memory.go index 45df832..99194fb 100644 --- a/db/memory/memory.go +++ b/db/memory/memory.go @@ -2,7 +2,6 @@ package memory import ( "errors" - "github.com/ByteStorage/FlyDB/lib/bloom" "sync" ) @@ -10,15 +9,12 @@ import ( type MemTable struct { table map[string][]byte // key -> value mutex sync.RWMutex // protect table - bloom *bloom.Filter // bloom filter } // NewMemTable create a new MemTable func NewMemTable() *MemTable { return &MemTable{ table: make(map[string][]byte), - // Initialize with no keys and 10 bits per key - bloom: bloom.NewBloomFilter(1000, 0.01), } } @@ -27,8 +23,6 @@ func (m *MemTable) Put(key string, value []byte) { m.mutex.Lock() defer m.mutex.Unlock() m.table[key] = value - // Add the key to the bloom filter - m.bloom.Add([]byte(key)) } // Get a value from the table @@ -36,11 +30,6 @@ func (m *MemTable) Get(key string) ([]byte, error) { m.mutex.RLock() defer m.mutex.RUnlock() - // Immediate return if the key is not in the bloom filter - if !m.bloom.MayContainItem([]byte(key)) { - return nil, errors.New("key not found") - } - value, ok := m.table[key] if !ok { return nil, errors.New("key not found") @@ -50,8 +39,6 @@ func (m *MemTable) Get(key string) ([]byte, error) { } // Delete a key from the table -// Note: Bloom filters don't support deletion without affecting accuracy -// so we don't remove the key from the bloom filter. func (m *MemTable) Delete(key string) { m.mutex.Lock() defer m.mutex.Unlock() From 8ed202926b541d052ae2adec8505280e865a7846 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 16:07:45 +0800 Subject: [PATCH 22/35] feat: add load wal method --- db/column/column.go | 11 ++++--- db/memory/db.go | 80 +++++++++++++++++++++++++++++++++++++++++---- lib/wal/wal.go | 67 ++++++++++++++++++++++++++++++++++--- 3 files changed, 143 insertions(+), 15 deletions(-) diff --git a/db/column/column.go b/db/column/column.go index d6c2101..40dc696 100644 --- a/db/column/column.go +++ b/db/column/column.go @@ -81,11 +81,14 @@ func NewColumn(option config.ColumnOptions) (Column, error) { }, nil } +// column is a column family, it contains a wal and a map of column family +// the map of column family is a map of column family name and column family +// the wal is a global wal of all column family type column struct { - mux sync.RWMutex - wal *wal.Wal - columnFamily map[string]*memory.Db - option config.ColumnOptions + mux sync.RWMutex // protect column family + wal *wal.Wal // wal of all column family + columnFamily map[string]*memory.Db // column family map + option config.ColumnOptions // column family options } func (c *column) CreateColumnFamily(name string) error { diff --git a/db/memory/db.go b/db/memory/db.go index 8869238..6d8881a 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -4,10 +4,18 @@ import ( "github.com/ByteStorage/FlyDB/config" "github.com/ByteStorage/FlyDB/db/engine" "github.com/ByteStorage/FlyDB/lib/wal" + "io" + "log" "os" "sync" ) +const ( + // Record types + putType = byte(1) + deleteType = byte(2) +) + type Db struct { option config.DbMemoryOptions db *engine.DB @@ -25,13 +33,16 @@ type Db struct { func NewDB(option config.DbMemoryOptions) (*Db, error) { // create a new memTable mem := NewMemTable() + // dir path has been changed to dir path + column name option.Option.DirPath = option.Option.DirPath + "/" + option.ColumnName db, err := engine.NewDB(option.Option) if err != nil { return nil, err } + w := option.Wal + // if wal is nil, create a new wal // if wal is not nil, the wal was created by column family if option.Wal == nil { @@ -47,6 +58,7 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { } } + // initialize db d := &Db{ mem: mem, db: db, @@ -58,17 +70,23 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { wal: w, pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, } + + // when loading, the system will execute the every record in wal + d.load() + // async write to db go d.async() + // async save wal go d.wal.AsyncSave() + // async handler error message go d.handlerErrMsg() return d, nil } func (d *Db) handlerErrMsg() { - log := d.option.Option.DirPath + "/error.log" + msgLog := d.option.Option.DirPath + "/error.log" for msg := range d.errMsgCh { // write to log - _ = os.WriteFile(log, []byte(msg), 0666) + _ = os.WriteFile(msgLog, []byte(msg), 0666) } } @@ -111,7 +129,7 @@ func (d *Db) Put(key []byte, value []byte) error { // if active memTable size > define size, change to immutable memTable if d.activeSize+keyLen+valueLen > d.option.MemSize { // add to immutable memTable list - d.AddOldMemTable(d.mem) + d.addOldMemTable(d.mem) // create new active memTable d.mem = NewMemTable() d.activeSize = 0 @@ -161,16 +179,27 @@ func (d *Db) Close() error { return d.db.Close() } -func (d *Db) AddOldMemTable(oldList *MemTable) { +func (d *Db) addOldMemTable(oldList *MemTable) { d.oldListChan <- oldList } func (d *Db) async() { for oldList := range d.oldListChan { for key, value := range oldList.table { - err := d.db.Put([]byte(key), value) - if err != nil { - // TODO handle error: either log it, retry, or whatever makes sense for your application + // Write to db, try 3 times + ok := false + for i := 0; i < 3; i++ { + err := d.db.Put([]byte(key), value) + if err == nil { + ok = true + break + } + } + if !ok { + err := d.wal.Delete([]byte(key)) + if err != nil { + d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() + } } d.totalSize -= int64(len(key) + len(value)) } @@ -180,3 +209,40 @@ func (d *Db) async() { func (d *Db) Clean() { d.db.Clean() } + +func (d *Db) load() { + // Initialize reading from the start of the WAL. + d.wal.InitReading() + + for { + record, err := d.wal.ReadNext() + if err == io.EOF { + break + } + if err != nil { + // Handle the error: log it, panic, return, etc. + log.Printf("Error reading from WAL: %v", err) + return + } + + switch record.Type { + case putType: + // Assuming Db has a Put method + err := d.Put(record.Key, record.Value) + if err != nil { + // Handle the error: log it, panic, return, etc. + log.Printf("Error applying PUT from WAL: %v", err) + } + case deleteType: + // Assuming Db has a Delete method + err := d.Delete(record.Key) + if err != nil { + // Handle the error: log it, panic, return, etc. + log.Printf("Error applying DELETE from WAL: %v", err) + } + default: + // Handle unknown type. + log.Printf("Unknown record type in WAL: %v", record.Type) + } + } +} diff --git a/lib/wal/wal.go b/lib/wal/wal.go index aa1f373..e2bbbd7 100644 --- a/lib/wal/wal.go +++ b/lib/wal/wal.go @@ -4,6 +4,7 @@ import ( "encoding/binary" "errors" "hash/crc32" + "io" "os" "time" @@ -21,10 +22,11 @@ const ( // Wal is a write-ahead log. type Wal struct { - m *fileio.MMapIO // MMapIOManager - logNum uint32 // Log number - saveTime int64 // Save time - dirPath string + m *fileio.MMapIO // MMapIOManager + logNum uint32 // Log number + saveTime int64 // Save time + dirPath string // Dir path + readOffset int64 // Read offset } // NewWal creates a new WAL. @@ -109,6 +111,63 @@ func (w *Wal) Delete(key []byte) error { return w.writeRecord(deleteType, key, nil) } +// Record is a structure that holds information about a record from the WAL. +type Record struct { + Type byte + Key []byte + Value []byte +} + +// InitReading Initializes the WAL reading position to the start of the file. +func (w *Wal) InitReading() { + w.readOffset = 0 +} + +// ReadNext reads the next operation from the WAL. +func (w *Wal) ReadNext() (*Record, error) { + buffer := make([]byte, 4+2+1+4) // Buffer size to read headers + _, err := w.m.Read(buffer, w.readOffset) + if err == io.EOF { + return nil, io.EOF + } + if err != nil { + return nil, err + } + + // Move readOffset + w.readOffset += int64(len(buffer)) + + // Verify CRC + expectedCRC := binary.LittleEndian.Uint32(buffer) + if crc32.ChecksumIEEE(buffer[4:]) != expectedCRC { + return nil, errors.New("corrupted record found") + } + + // Get record size and type + size := binary.LittleEndian.Uint16(buffer[4:]) + recordType := buffer[4+2] + + // Read the payload + payload := make([]byte, size-4) // Subtract 4 for log number + _, err = w.m.Read(payload, w.readOffset) + if err != nil { + return nil, err + } + + // Move readOffset again + w.readOffset += int64(len(payload)) + + // Parse based on record type + switch recordType { + case putType: + return &Record{Type: putType, Key: payload[:len(payload)-len(buffer)], Value: payload[len(payload)-len(buffer):]}, nil + case deleteType: + return &Record{Type: deleteType, Key: payload, Value: nil}, nil + default: + return nil, errors.New("unknown record type") + } +} + // Save flushes the WAL to disk. func (w *Wal) Save() error { return w.m.Sync() From a242d49823b4c7f129a5ed71537c7b7f7f3c12b8 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 16:35:07 +0800 Subject: [PATCH 23/35] feat: add bloom at art index --- config/options.go | 3 ++ db/index/art_with_bloom.go | 71 ++++++++++++++++++++++++++++++++++++++ db/index/index.go | 5 +++ db/memory/db.go | 3 +- 4 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 db/index/art_with_bloom.go diff --git a/config/options.go b/config/options.go index f9d6a15..77903a1 100644 --- a/config/options.go +++ b/config/options.go @@ -46,6 +46,9 @@ const ( // SkipList SkipList + + // ARTWithBloom index With Bloom Filter + ARTWithBloom ) const ( diff --git a/db/index/art_with_bloom.go b/db/index/art_with_bloom.go new file mode 100644 index 0000000..4e20db4 --- /dev/null +++ b/db/index/art_with_bloom.go @@ -0,0 +1,71 @@ +package index + +import ( + "github.com/ByteStorage/FlyDB/db/data" + "github.com/ByteStorage/FlyDB/lib/bloom" + art "github.com/plar/go-adaptive-radix-tree" + "sync" +) + +// AdaptiveRadixTreeWithBloom Adaptive Radix Tree Index +// The following link is the ART library written by go. +// If you need to know more about it, please go to the corresponding warehouse. +// https://github.com/plar/go-adaptive-radix-tree +type AdaptiveRadixTreeWithBloom struct { + tree art.Tree + lock *sync.RWMutex + filter *bloom.Filter +} + +// NewARTWithBloom Initializes the adaptive radix tree index +func NewARTWithBloom() *AdaptiveRadixTreeWithBloom { + return &AdaptiveRadixTreeWithBloom{ + tree: art.New(), + lock: new(sync.RWMutex), + filter: bloom.NewBloomFilter(1000, 0.01), + } +} + +func (artree *AdaptiveRadixTreeWithBloom) Put(key []byte, pst *data.LogRecordPst) bool { + artree.lock.Lock() + defer artree.lock.Unlock() + artree.tree.Insert(key, pst) + artree.filter.Add(key) + return true +} + +func (artree *AdaptiveRadixTreeWithBloom) Get(key []byte) *data.LogRecordPst { + if !artree.filter.MayContainItem(key) { + return nil + } + artree.lock.RLock() + defer artree.lock.RUnlock() + value, found := artree.tree.Search(key) + if !found { + return nil + } + return value.(*data.LogRecordPst) +} + +func (artree *AdaptiveRadixTreeWithBloom) Delete(key []byte) bool { + if !artree.filter.MayContainItem(key) { + return false + } + artree.lock.Lock() + defer artree.lock.Unlock() + _, deleted := artree.tree.Delete(key) + return deleted +} + +func (artree *AdaptiveRadixTreeWithBloom) Size() int { + artree.lock.RLock() + defer artree.lock.RUnlock() + size := artree.tree.Size() + return size +} + +func (artree *AdaptiveRadixTreeWithBloom) Iterator(reverse bool) Iterator { + artree.lock.RLock() + defer artree.lock.RUnlock() + return NewARTreeIterator(artree.tree, reverse) +} diff --git a/db/index/index.go b/db/index/index.go index 9a06acc..698cab4 100644 --- a/db/index/index.go +++ b/db/index/index.go @@ -39,6 +39,9 @@ const ( // SkipList Index SkipListIndex + + // ARTWithBloom index With Bloom Filter + ARTWithBloom ) func NewIndexer(typeIndex IndexType, dirPath string) Indexer { @@ -49,6 +52,8 @@ func NewIndexer(typeIndex IndexType, dirPath string) Indexer { return NewART() case SkipListIndex: return NewSkipList() + case ARTWithBloom: + return NewARTWithBloom() default: panic("unsupported index type") } diff --git a/db/memory/db.go b/db/memory/db.go index 6d8881a..7dbca74 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -36,6 +36,7 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { // dir path has been changed to dir path + column name option.Option.DirPath = option.Option.DirPath + "/" + option.ColumnName + option.Option.IndexType = config.ARTWithBloom db, err := engine.NewDB(option.Option) if err != nil { return nil, err @@ -72,7 +73,7 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { } // when loading, the system will execute the every record in wal - d.load() + //d.load() // async write to db go d.async() // async save wal From f461bb216403c4616a103bcc40f43cc00d82f3d1 Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 16:59:11 +0800 Subject: [PATCH 24/35] feat: add delete method for db --- db/memory/db.go | 46 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/db/memory/db.go b/db/memory/db.go index 7dbca74..efcd0a9 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -27,6 +27,7 @@ type Db struct { activeSize int64 pool *sync.Pool errMsgCh chan string + mux sync.RWMutex } // NewDB create a new db of wal and memTable @@ -70,6 +71,7 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { totalSize: 0, wal: w, pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, + mux: sync.RWMutex{}, } // when loading, the system will execute the every record in wal @@ -92,6 +94,8 @@ func (d *Db) handlerErrMsg() { } func (d *Db) Put(key []byte, value []byte) error { + d.mux.Lock() + defer d.mux.Unlock() // calculate key and value size keyLen := int64(len(key)) valueLen := int64(len(value)) @@ -146,6 +150,8 @@ func (d *Db) Put(key []byte, value []byte) error { } func (d *Db) Get(key []byte) ([]byte, error) { + d.mux.RLock() + defer d.mux.RUnlock() // first get from memTable value, err := d.mem.Get(string(key)) if err == nil { @@ -165,7 +171,45 @@ func (d *Db) Get(key []byte) ([]byte, error) { } func (d *Db) Delete(key []byte) error { - panic("implement me") + d.mux.Lock() + defer d.mux.Unlock() + + d.pool.Put(func() { + // Write to wal, try 3 times + ok := false + for i := 0; i < 3; i++ { + err := d.wal.Delete(key) + if err == nil { + ok = true + break + } + } + if !ok { + err := d.wal.Delete(key) + if err != nil { + d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() + } + } + }) + // get from active memTable + get, err := d.mem.Get(string(key)) + if err == nil { + d.activeSize -= int64(len(key) + len(get)) + d.totalSize -= int64(len(key) + len(get)) + d.mem.Delete(string(key)) + return nil + } + // get from immutable memTable + for _, list := range d.oldList { + get, err = list.Get(string(key)) + if err == nil { + d.totalSize -= int64(len(key) + len(get)) + list.Delete(string(key)) + return nil + } + } + // get from db + return d.db.Delete(key) } func (d *Db) Keys() ([][]byte, error) { From 9520b374ce509ebf1a73ec2dd0332a2c4bd61e5d Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Sun, 20 Aug 2023 17:09:43 +0800 Subject: [PATCH 25/35] feat: add wal compact --- lib/wal/wal.go | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/lib/wal/wal.go b/lib/wal/wal.go index e2bbbd7..5e4bf6f 100644 --- a/lib/wal/wal.go +++ b/lib/wal/wal.go @@ -27,6 +27,7 @@ type Wal struct { saveTime int64 // Save time dirPath string // Dir path readOffset int64 // Read offset + filesize int64 // File size } // NewWal creates a new WAL. @@ -56,6 +57,7 @@ func NewWal(options Options) (*Wal, error) { logNum: options.LogNum, saveTime: options.SaveTime, dirPath: options.DirPath, + filesize: options.FileSize, }, nil } @@ -168,6 +170,101 @@ func (w *Wal) ReadNext() (*Record, error) { } } +func (w *Wal) Compact() error { + // Create a map to track the latest put operations and a set to track deleted keys + latestPuts := make(map[string][]byte) + deletedKeys := make(map[string]bool) + + w.InitReading() + for { + record, err := w.ReadNext() + if err == io.EOF { + break + } + if err != nil { + return err + } + if record.Type == putType { + latestPuts[string(record.Key)] = record.Value + // If the key was previously deleted, ensure it's removed from the deletedKeys set + delete(deletedKeys, string(record.Key)) + } else if record.Type == deleteType { + delete(latestPuts, string(record.Key)) + deletedKeys[string(record.Key)] = true + } + } + + // Step 2: Create a temporary WAL for writing compressed records + tmpWALPath := w.dirPath + "/db.tmp.wal" + tmpWAL, err := fileio.NewMMapIOManager(tmpWALPath, w.filesize) // assuming w.m provides FileSize method + if err != nil { + return err + } + defer tmpWAL.Close() + + // Step 3: Write records to temporary WAL + for key, value := range latestPuts { + // Skip the key if it was deleted + if _, deleted := deletedKeys[key]; deleted { + continue + } + // TODO: Consider adding the log number, if necessary. + err = w.writeToSpecificWAL(tmpWAL, putType, []byte(key), value) + if err != nil { + return err + } + } + + // Rename files to replace the old WAL with the compacted one + err = os.Rename(tmpWALPath, w.dirPath+walFileName) + if err != nil { + return err + } + + // Reinitialize mmap with the compacted file + w.m, err = fileio.NewMMapIOManager(w.dirPath+walFileName, w.filesize) + if err != nil { + return err + } + + return nil +} + +func (w *Wal) writeToSpecificWAL(targetWAL *fileio.MMapIO, recordType byte, key, value []byte) error { + // Prepare the payload based on record type + var payload []byte + switch recordType { + case putType: + payload = append(key, value...) + case deleteType: + payload = key + default: + return errors.New("unknown record type") + } + + size := uint16(4 + len(payload)) // 4 bytes for log number + buffer := make([]byte, 4+2+1+4+len(payload)) + + // Compute CRC + crc := crc32.ChecksumIEEE(buffer[4:]) + binary.LittleEndian.PutUint32(buffer, crc) + + // Write size + binary.LittleEndian.PutUint16(buffer[4:], size) + + // Write type + buffer[4+2] = recordType + + // Write log number + binary.LittleEndian.PutUint32(buffer[4+2+1:], w.logNum) + + // Write payload + copy(buffer[4+2+1+4:], payload) + + _, err := targetWAL.Write(buffer) + return err +} + // Save flushes the WAL to disk. func (w *Wal) Save() error { return w.m.Sync() From e88ead37524b63de435dd84e7d4b7b6c291f688a Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Sun, 20 Aug 2023 19:30:31 +0800 Subject: [PATCH 26/35] Adjust the configuration structure --- config/cluster_options.go | 85 ++++++++++++++++++++++++++++---- config/column_options.go | 8 --- config/db_memory_options.go | 14 ------ config/options.go | 69 +++++++++++++++++++++++--- config/region_config.go | 9 ---- config/store_config.go | 8 --- config/tcpConfig.go | 21 -------- lib/wal/wal_options.go | 14 +++++- protocol/tcp/reply.go | 71 --------------------------- protocol/tcp/replyClient.go | 21 -------- protocol/tcp/server.go | 86 --------------------------------- protocol/tcp/tcp_server/main.go | 18 ------- protocol/tcpIF/handle.go | 14 ------ 13 files changed, 150 insertions(+), 288 deletions(-) delete mode 100644 config/column_options.go delete mode 100644 config/db_memory_options.go delete mode 100644 config/region_config.go delete mode 100644 config/store_config.go delete mode 100644 config/tcpConfig.go delete mode 100644 protocol/tcp/reply.go delete mode 100644 protocol/tcp/replyClient.go delete mode 100644 protocol/tcp/server.go delete mode 100644 protocol/tcp/tcp_server/main.go delete mode 100644 protocol/tcpIF/handle.go diff --git a/config/cluster_options.go b/config/cluster_options.go index d9ac2cb..3710af4 100644 --- a/config/cluster_options.go +++ b/config/cluster_options.go @@ -2,16 +2,81 @@ package config import "time" +// Config holds configuration options for a distributed database. type Config struct { - ReplicationFactor int - ShardingStrategy string - SchedulingStrategy string - LogDataStorage string - LogDataStoragePath string - SnapshotStorage string + // ReplicationFactor specifies the number of replicas for each piece of data, + // impacting redundancy and availability. + ReplicationFactor int + + // ShardingStrategy defines the strategy for data sharding, determining how data + // is partitioned and distributed across nodes in the cluster. + ShardingStrategy string + + // SchedulingStrategy specifies the task scheduling strategy, affecting data balancing + // and load distribution. + SchedulingStrategy string + + // LogDataStorage specifies the storage type for log data, which could be disk, + // network storage, etc. + LogDataStorage string + + // LogDataStoragePath is the path for storing log data. + LogDataStoragePath string + + // SnapshotStorage specifies the storage type for snapshot data, used for backup + // and restoration. + SnapshotStorage string + + // SnapshotStoragePath is the path for storing snapshot data. SnapshotStoragePath string - LogDataStorageSize int64 - HeartbeatInterval time.Duration - MetaNodes []string - StoreNodes []string + + // LogDataStorageSize specifies the maximum capacity for log data storage. + LogDataStorageSize int64 + + // HeartbeatInterval defines the interval for heartbeats, used to maintain communication + // and state synchronization among nodes in the cluster. + HeartbeatInterval time.Duration + + // MetaNodes contains the addresses of metadata nodes, used for managing the cluster's + // metadata information. + MetaNodes []string + + // StoreNodes contains the addresses of storage nodes, used for storing and + // accessing actual data. + StoreNodes []string +} + +// RegionConfig encapsulates configuration and boundary information for a specific region +// within a distributed Raft-based database. +type RegionConfig struct { + // Options contains a set of configuration options specific to the behavior of the Raft region. + Options Options + + // Config holds additional configuration settings related to the operation of the Raft region. + Config Config + + // Id represents the unique identifier for the Raft region. + Id int64 + + // Start specifies the starting boundary key of the Raft region. + Start []byte + + // End specifies the ending boundary key of the Raft region. + End []byte +} + +// StoreConfig encapsulates configuration and identification information for a store +// within a distributed system. +type StoreConfig struct { + // Options contains a set of configuration options specific to the behavior of the store. + Options Options + + // Config holds additional configuration settings related to the operation of the store. + Config Config + + // Id represents the unique identifier for the store. + Id int64 + + // Addr specifies the network address at which the store can be accessed. + Addr string } diff --git a/config/column_options.go b/config/column_options.go deleted file mode 100644 index 86f2d6e..0000000 --- a/config/column_options.go +++ /dev/null @@ -1,8 +0,0 @@ -package config - -import "github.com/ByteStorage/FlyDB/lib/wal" - -type ColumnOptions struct { - DbMemoryOptions DbMemoryOptions - WalOptions wal.Options -} diff --git a/config/db_memory_options.go b/config/db_memory_options.go deleted file mode 100644 index ed3bc01..0000000 --- a/config/db_memory_options.go +++ /dev/null @@ -1,14 +0,0 @@ -package config - -import "github.com/ByteStorage/FlyDB/lib/wal" - -type DbMemoryOptions struct { - Option Options - LogNum uint32 - FileSize int64 - SaveTime int64 - MemSize int64 - TotalMemSize int64 - ColumnName string - Wal *wal.Wal -} diff --git a/config/options.go b/config/options.go index 77903a1..9380f8d 100644 --- a/config/options.go +++ b/config/options.go @@ -1,19 +1,75 @@ package config -import "os" +import ( + "github.com/ByteStorage/FlyDB/lib/wal" + "os" +) +// Options is a comprehensive configuration struct that +// encapsulates various settings for configuring the behavior of a database. type Options struct { - DirPath string // Database data directory - DataFileSize int64 // Size of data files - SyncWrite bool // Whether to persist data on every write - IndexType IndexerType - FIOType FIOType + // DirPath specifies the path to the directory where the database will store its data files. + DirPath string + + // DataFileSize defines the maximum size of each data file in the database. + DataFileSize int64 + + // SyncWrite determines whether the database should ensure data persistence with + // every write operation. + SyncWrite bool + + // IndexType selects the type of indexing mechanism to be used for efficient data retrieval. + IndexType IndexerType + + // FIOType indicates the type of file I/O optimization to be applied by the database. + FIOType FIOType +} + +// ColumnOptions are configurations for database column families +type ColumnOptions struct { + // DbMemoryOptions contains configuration settings + // for managing database memory usage and caching. + DbMemoryOptions DbMemoryOptions + + // WalOptions contains configuration settings for the Write-Ahead Logging (WAL) mechanism. + WalOptions wal.Options +} + +// DbMemoryOptions is related to configuration of database memory tables +type DbMemoryOptions struct { + // Option contains a set of database configuration options + // to influence memory management behavior. + Option Options + + // LogNum specifies the number of logs to keep in memory + // for efficient access and performance. + LogNum uint32 + + // FileSize defines the maximum size of data files to be kept in memory. + FileSize int64 + + // SaveTime determines the interval at which data should be + // saved from memory to disk to ensure durability. + SaveTime int64 + + // MemSize sets the limit on the amount of memory to be used for caching purposes. + MemSize int64 + + // TotalMemSize defines the overall memory capacity allocated for database operations. + TotalMemSize int64 + + // ColumnName identifies the specific database column to which these memory options apply. + ColumnName string + + // Wal is a reference to the Write-Ahead Logging (WAL) mechanism that ensures data durability. + Wal *wal.Wal } // IteratorOptions is the configuration for index iteration. type IteratorOptions struct { // Prefix specifies the prefix value for keys to iterate over. Default is empty. Prefix []byte + // Reverse indicates whether to iterate in reverse order. // Default is false for forward iteration. Reverse bool @@ -23,6 +79,7 @@ type IteratorOptions struct { type WriteBatchOptions struct { // MaxBatchNum is the maximum number of data entries in a batch. MaxBatchNum uint + // SyncWrites indicates whether to sync (persist) the data on batch commit. SyncWrites bool } diff --git a/config/region_config.go b/config/region_config.go deleted file mode 100644 index 35e5822..0000000 --- a/config/region_config.go +++ /dev/null @@ -1,9 +0,0 @@ -package config - -type RegionConfig struct { - Options Options - Config Config - Id int64 - Start []byte - End []byte -} diff --git a/config/store_config.go b/config/store_config.go deleted file mode 100644 index d75b714..0000000 --- a/config/store_config.go +++ /dev/null @@ -1,8 +0,0 @@ -package config - -type StoreConfig struct { - Options Options - Config Config - Id int64 - Addr string -} diff --git a/config/tcpConfig.go b/config/tcpConfig.go deleted file mode 100644 index c8c7a49..0000000 --- a/config/tcpConfig.go +++ /dev/null @@ -1,21 +0,0 @@ -package config - -// TcpServerConfiguration define global tcp server config -type TcpServerConfiguration struct { - Host string - Port int - MaxClients int -} - -// Configuration is global tcp server config -var Configuration *TcpServerConfiguration - -// Init global tcp server config -func Init() *TcpServerConfiguration { - Configuration = &TcpServerConfiguration{ - Host: "127.0.0.1", - Port: 6379, - MaxClients: 10000, - } - return Configuration -} diff --git a/lib/wal/wal_options.go b/lib/wal/wal_options.go index 9caf5a3..1d64c53 100644 --- a/lib/wal/wal_options.go +++ b/lib/wal/wal_options.go @@ -1,8 +1,18 @@ package wal +// Options encapsulates configuration settings for the Write-Ahead Logging (WAL) +// mechanism in a database. type Options struct { - DirPath string + // DirPath specifies the directory path where Write-Ahead Logging (WAL) files will be stored. + DirPath string + + // FileSize determines the maximum size of individual WAL files. FileSize int64 + + // SaveTime defines the interval at which WAL data should be persisted from memory to disk. SaveTime int64 - LogNum uint32 + + // LogNum specifies the number of WAL logs to retain, influencing performance and + // recovery behavior. + LogNum uint32 } diff --git a/protocol/tcp/reply.go b/protocol/tcp/reply.go deleted file mode 100644 index ae20fc4..0000000 --- a/protocol/tcp/reply.go +++ /dev/null @@ -1,71 +0,0 @@ -package tcp - -import ( - "bufio" - "context" - "fmt" - "github.com/ByteStorage/FlyDB/lib/sync/boolAm" - "github.com/ByteStorage/FlyDB/protocol/tcpIF" - "io" - "net" - "sync" -) - -var _ tcpIF.Handler = (*TcpReplyHandler)(nil) - -type TcpReplyHandler struct { - activeConn sync.Map - isClosed boolAm.Boolean -} - -// NewHandler create a new handler -func NewHandler() *TcpReplyHandler { - return &TcpReplyHandler{} -} - -// Handle client connection -func (t *TcpReplyHandler) Handle(ctx context.Context, conn net.Conn) { - if t.isClosed.GetBoolAtomic() { - _ = conn.Close() - return - } - - client := &ReplyClient{ - Conn: conn, - } - t.activeConn.Store(client, struct{}{}) - - reader := bufio.NewReader(conn) - for { - msg, err := reader.ReadString('\n') - if err != nil { - if err == io.EOF { - fmt.Println("client close connection") - t.activeConn.Delete(client) - } else { - fmt.Println("read message error: ", err) - } - return - } - - if msg[:2] == "\\n" { - client.Waiting.Add(1) - buf := []byte(msg) - _, _ = conn.Write(buf) - client.Waiting.Done() - } - } - -} - -// Close handler -func (t *TcpReplyHandler) Close() error { - fmt.Println("tcp server close") - t.isClosed.SetBoolAtomic(true) - t.activeConn.Range(func(key, value interface{}) bool { - client := key.(*ReplyClient) - _ = client.Close() - return true - }) - return nil -} diff --git a/protocol/tcp/replyClient.go b/protocol/tcp/replyClient.go deleted file mode 100644 index 7fd8c7c..0000000 --- a/protocol/tcp/replyClient.go +++ /dev/null @@ -1,21 +0,0 @@ -package tcp - -import ( - "github.com/ByteStorage/FlyDB/lib/sync/wait" - "net" - "time" -) - -//var _ tcpIF.Handler = (*ReplyClient)(nil) - -type ReplyClient struct { - Conn net.Conn - Waiting wait.Wait -} - -// Close client connection -func (r *ReplyClient) Close() error { - r.Waiting.WaitTimeout(10 * time.Second) - _ = r.Conn.Close() - return nil -} diff --git a/protocol/tcp/server.go b/protocol/tcp/server.go deleted file mode 100644 index 699953e..0000000 --- a/protocol/tcp/server.go +++ /dev/null @@ -1,86 +0,0 @@ -package tcp - -import ( - "context" - "fmt" - "github.com/ByteStorage/FlyDB/protocol/tcpIF" - "net" - "os" - "os/signal" - "sync" - "syscall" -) - -type Config struct { - Address string -} - -// ListenAndServeBySignal start tcp server by signal -func ListenAndServeBySignal(cfg *Config, handler tcpIF.Handler) error { - closeChan := make(chan struct{}) - // listen system-level signal - signalChan := make(chan os.Signal, 1) - // syscall.SIGHUP: terminal closed - // syscall.SIGINT: ctrl + c - // syscall.SIGTERM: kill - // syscall.SIGQUIT: ctrl + \ - signal.Notify(signalChan, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) - - // receive signal - // when receive a signal, send a signal to closeChan - // closeChan will close tcp server - go func() { - s := <-signalChan - switch s { - case syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT: - closeChan <- struct{}{} - } - }() - - listener, err := net.Listen("tcp", cfg.Address) - if err != nil { - return err - } - fmt.Println("tcp server start listen on: ", cfg.Address) - - // start tcp server - ListenAndServe(listener, handler, closeChan) - - return nil -} - -// ListenAndServe start tcp server -// closeChan is a channel to close tcp server -// when closeChan receive a signal, tcp server will close -func ListenAndServe(listener net.Listener, handler tcpIF.Handler, closeChan <-chan struct{}) { - defer func() { - _ = listener.Close() - _ = handler.Close() - }() - - go func() { - // wait for close signal - <-closeChan - fmt.Println("tcp server close but is shutting down...") - _ = listener.Close() - _ = handler.Close() - }() - - ctx := context.Background() - var wg sync.WaitGroup - for { - conn, err := listener.Accept() - if err != nil { - break - } - fmt.Println("accept new connection: ", conn.RemoteAddr().String()) - wg.Add(1) - go func() { - defer func() { - wg.Done() - }() - handler.Handle(ctx, conn) - }() - } - wg.Wait() -} diff --git a/protocol/tcp/tcp_server/main.go b/protocol/tcp/tcp_server/main.go deleted file mode 100644 index 3cdf2ad..0000000 --- a/protocol/tcp/tcp_server/main.go +++ /dev/null @@ -1,18 +0,0 @@ -package main - -import ( - "fmt" - "github.com/ByteStorage/FlyDB/config" - tcp2 "github.com/ByteStorage/FlyDB/protocol/tcp" - "strconv" -) - -func main() { - tpcDefaultConfig := config.Init() - err := tcp2.ListenAndServeBySignal(&tcp2.Config{ - Address: tpcDefaultConfig.Host + ":" + strconv.Itoa(tpcDefaultConfig.Port), - }, tcp2.NewHandler()) - if err != nil { - fmt.Println(err) - } -} diff --git a/protocol/tcpIF/handle.go b/protocol/tcpIF/handle.go deleted file mode 100644 index 1c8df59..0000000 --- a/protocol/tcpIF/handle.go +++ /dev/null @@ -1,14 +0,0 @@ -package tcpIF - -import ( - "context" - "net" -) - -type Handler interface { - // Handle client connection - Handle(ct context.Context, conn net.Conn) - - // Close handler - Close() error -} From 8312d73ee96f845ed41c5bb71f64f1997e51a857 Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Mon, 21 Aug 2023 20:42:31 +0800 Subject: [PATCH 27/35] load for wal not to flush --- db/memory/db.go | 69 +++++++++++++++++++++++++++++++------------------ lib/wal/wal.go | 11 ++++++++ 2 files changed, 55 insertions(+), 25 deletions(-) diff --git a/db/memory/db.go b/db/memory/db.go index efcd0a9..d67a478 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -17,17 +17,19 @@ const ( ) type Db struct { - option config.DbMemoryOptions - db *engine.DB - mem *MemTable - oldList []*MemTable - wal *wal.Wal - oldListChan chan *MemTable - totalSize int64 - activeSize int64 - pool *sync.Pool - errMsgCh chan string - mux sync.RWMutex + option config.DbMemoryOptions + db *engine.DB + mem *MemTable + oldList []*MemTable + wal *wal.Wal + oldListChan chan *MemTable + totalSize int64 + activeSize int64 + pool *sync.Pool + errMsgCh chan string + mux sync.RWMutex + walDataMtList []*MemTable + walDataMtListChan chan *MemTable } // NewDB create a new db of wal and memTable @@ -62,20 +64,22 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { // initialize db d := &Db{ - mem: mem, - db: db, - option: option, - oldList: make([]*MemTable, 0), - oldListChan: make(chan *MemTable, 1000000), - activeSize: 0, - totalSize: 0, - wal: w, - pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, - mux: sync.RWMutex{}, + mem: mem, + db: db, + option: option, + oldList: make([]*MemTable, 0), + oldListChan: make(chan *MemTable, 1000000), + activeSize: 0, + totalSize: 0, + wal: w, + pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, + mux: sync.RWMutex{}, + walDataMtList: make([]*MemTable, 0), + walDataMtListChan: make(chan *MemTable, 1000000), } // when loading, the system will execute the every record in wal - //d.load() + d.load() // async write to db go d.async() // async save wal @@ -93,6 +97,8 @@ func (d *Db) handlerErrMsg() { } } +var putTypeInt = int64(1) + func (d *Db) Put(key []byte, value []byte) error { d.mux.Lock() defer d.mux.Unlock() @@ -134,7 +140,12 @@ func (d *Db) Put(key []byte, value []byte) error { // if active memTable size > define size, change to immutable memTable if d.activeSize+keyLen+valueLen > d.option.MemSize { // add to immutable memTable list - d.addOldMemTable(d.mem) + if putTypeInt == 1 { + d.addOldMemTable(d.mem) + } else { + d.addWalDataToMemTable(d.mem) + putTypeInt = 1 + } // create new active memTable d.mem = NewMemTable() d.activeSize = 0 @@ -158,8 +169,10 @@ func (d *Db) Get(key []byte) ([]byte, error) { return value, nil } + mtList := append(append([]*MemTable(nil), d.walDataMtList...), d.oldList...) + // if active memTable not found, get from immutable memTable - for _, list := range d.oldList { + for _, list := range mtList { value, err = list.Get(string(key)) if err == nil { return value, nil @@ -200,7 +213,8 @@ func (d *Db) Delete(key []byte) error { return nil } // get from immutable memTable - for _, list := range d.oldList { + mtList := append(append([]*MemTable(nil), d.walDataMtList...), d.oldList...) + for _, list := range mtList { get, err = list.Get(string(key)) if err == nil { d.totalSize -= int64(len(key) + len(get)) @@ -228,6 +242,10 @@ func (d *Db) addOldMemTable(oldList *MemTable) { d.oldListChan <- oldList } +func (d *Db) addWalDataToMemTable(walDataMt *MemTable) { + d.walDataMtListChan <- walDataMt +} + func (d *Db) async() { for oldList := range d.oldListChan { for key, value := range oldList.table { @@ -273,6 +291,7 @@ func (d *Db) load() { switch record.Type { case putType: // Assuming Db has a Put method + putTypeInt = 0 err := d.Put(record.Key, record.Value) if err != nil { // Handle the error: log it, panic, return, etc. diff --git a/lib/wal/wal.go b/lib/wal/wal.go index 5e4bf6f..90893bc 100644 --- a/lib/wal/wal.go +++ b/lib/wal/wal.go @@ -32,9 +32,14 @@ type Wal struct { // NewWal creates a new WAL. func NewWal(options Options) (*Wal, error) { + // Construct the full path to the WAL file using the given directory path and + // a predefined file name. fileName := options.DirPath + walFileName + + // Check if the WAL file exists. stat, err := os.Stat(fileName) if err != nil { + // If the file doesn't exist, create the necessary directory and the file itself. if os.IsNotExist(err) { err := os.MkdirAll(options.DirPath, os.ModePerm) if err != nil { @@ -46,12 +51,18 @@ func NewWal(options Options) (*Wal, error) { } } } else { + // If the file exists, calculate the log number based on its size and + // the specified file size. options.LogNum = uint32(stat.Size() / options.FileSize) } + + // Create a memory-mapped I/O manager for the WAL file. mapIO, err := fileio.NewMMapIOManager(fileName, options.FileSize) if err != nil { return nil, err } + + // Initialize and return a new WAL instance with the provided options and created I/O manager. return &Wal{ m: mapIO, logNum: options.LogNum, From 447c743d3acdf6a188ed43f9cda7bc077e147582 Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Mon, 21 Aug 2023 22:26:35 +0800 Subject: [PATCH 28/35] add default memory option --- config/options.go | 11 +++++++++++ db/memory/db_test.go | 15 ++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/config/options.go b/config/options.go index 9380f8d..5a33c3e 100644 --- a/config/options.go +++ b/config/options.go @@ -129,3 +129,14 @@ var DefaultWriteBatchOptions = WriteBatchOptions{ MaxBatchNum: 10000, SyncWrites: true, } + +var DefaultDbMemoryOptions = DbMemoryOptions{ + Option: DefaultOptions, + LogNum: 1000, + FileSize: 256 * 1024 * 1024, // 256MB + SaveTime: 100 * 1000, + MemSize: 256 * 1024 * 1024, // 256MB + TotalMemSize: 1 * 1024 * 1024 * 1024, // 2GB + ColumnName: "default", + Wal: nil, +} diff --git a/db/memory/db_test.go b/db/memory/db_test.go index 1cae43a..eb22632 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -15,15 +15,12 @@ func TestPutAndGet(t *testing.T) { dir, _ := os.MkdirTemp("", "flydb-benchmark") opts.DirPath = dir opts.DataFileSize = 64 * 1024 * 1024 - memoryOptions := config.DbMemoryOptions{ - Option: opts, - LogNum: 100, - FileSize: 100 * 1024 * 1024, - SaveTime: 100 * 1000, - MemSize: 256 * 1024 * 1024, - TotalMemSize: 2 * 1024 * 1024 * 1024, - } - db, err := NewDB(memoryOptions) + memOpt := config.DefaultDbMemoryOptions + memOpt.LogNum = 100 + memOpt.FileSize = 100 * 1024 * 1024 + memOpt.TotalMemSize = 2 * 1024 * 1024 * 1024 + + db, err := NewDB(memOpt) defer db.Clean() assert.Nil(t, err) assert.NotNil(t, db) From 4e7b812931a725f4f659454ff99ae297d395d5be Mon Sep 17 00:00:00 2001 From: sjcsjc123 <1401189096@qq.com> Date: Mon, 21 Aug 2023 23:43:07 +0800 Subject: [PATCH 29/35] feat: fix bug with channel block --- db/memory/db.go | 65 ++++++++++++++++++++++++++++++++++++++++---- db/memory/db_test.go | 8 ++---- go.mod | 1 + go.sum | 2 ++ 4 files changed, 65 insertions(+), 11 deletions(-) diff --git a/db/memory/db.go b/db/memory/db.go index d67a478..457b7a4 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -25,7 +25,7 @@ type Db struct { oldListChan chan *MemTable totalSize int64 activeSize int64 - pool *sync.Pool + walTask chan func() errMsgCh chan string mux sync.RWMutex walDataMtList []*MemTable @@ -72,10 +72,11 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { activeSize: 0, totalSize: 0, wal: w, - pool: &sync.Pool{New: func() interface{} { return make([]byte, 0, 1024) }}, + walTask: make(chan func(), 10000000), mux: sync.RWMutex{}, walDataMtList: make([]*MemTable, 0), walDataMtListChan: make(chan *MemTable, 1000000), + errMsgCh: make(chan string, 1000000), } // when loading, the system will execute the every record in wal @@ -86,6 +87,8 @@ func NewDB(option config.DbMemoryOptions) (*Db, error) { go d.wal.AsyncSave() // async handler error message go d.handlerErrMsg() + // async worker + go d.work() return d, nil } @@ -106,7 +109,12 @@ func (d *Db) Put(key []byte, value []byte) error { keyLen := int64(len(key)) valueLen := int64(len(value)) - d.pool.Put(func() { + //err := d.wal.Put(key, value) + //if err != nil { + // return err + //} + + d.walTask <- func() { // Write to wal, try 3 times ok := false for i := 0; i < 3; i++ { @@ -122,7 +130,28 @@ func (d *Db) Put(key []byte, value []byte) error { d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() } } - }) + } + + //err := d.pool.Submit(func() { + // // Write to wal, try 3 times + // ok := false + // for i := 0; i < 3; i++ { + // err := d.wal.Put(key, value) + // if err == nil { + // ok = true + // break + // } + // } + // if !ok { + // err := d.wal.Delete(key) + // if err != nil { + // d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() + // } + // } + //}) + //if err != nil { + // return err + //} // if sync write, save wal if d.option.Option.SyncWrite { @@ -187,7 +216,7 @@ func (d *Db) Delete(key []byte) error { d.mux.Lock() defer d.mux.Unlock() - d.pool.Put(func() { + d.walTask <- func() { // Write to wal, try 3 times ok := false for i := 0; i < 3; i++ { @@ -203,7 +232,25 @@ func (d *Db) Delete(key []byte) error { d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() } } - }) + } + + //err := d.pool.Submit(func() { + // // Write to wal, try 3 times + // ok := false + // for i := 0; i < 3; i++ { + // err := d.wal.Delete(key) + // if err == nil { + // ok = true + // break + // } + // } + // if !ok { + // err := d.wal.Delete(key) + // if err != nil { + // d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() + // } + // } + //}) // get from active memTable get, err := d.mem.Get(string(key)) if err == nil { @@ -238,6 +285,12 @@ func (d *Db) Close() error { return d.db.Close() } +func (d *Db) work() { + for task := range d.walTask { + task() + } +} + func (d *Db) addOldMemTable(oldList *MemTable) { d.oldListChan <- oldList } diff --git a/db/memory/db_test.go b/db/memory/db_test.go index eb22632..e9fb1a8 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -11,14 +11,12 @@ import ( ) func TestPutAndGet(t *testing.T) { - opts := config.DefaultOptions - dir, _ := os.MkdirTemp("", "flydb-benchmark") - opts.DirPath = dir - opts.DataFileSize = 64 * 1024 * 1024 + err := os.Mkdir("./flydb-benchmark", os.ModePerm) memOpt := config.DefaultDbMemoryOptions memOpt.LogNum = 100 - memOpt.FileSize = 100 * 1024 * 1024 + memOpt.FileSize = 256 * 1024 * 1024 memOpt.TotalMemSize = 2 * 1024 * 1024 * 1024 + memOpt.Option.DirPath = "./flydb-benchmark" db, err := NewDB(memOpt) defer db.Clean() diff --git a/go.mod b/go.mod index d90ae5e..8f28b68 100644 --- a/go.mod +++ b/go.mod @@ -47,6 +47,7 @@ require ( github.com/kr/pretty v0.3.0 // indirect github.com/mattn/go-colorable v0.1.12 // indirect github.com/mattn/go-isatty v0.0.16 // indirect + github.com/panjf2000/ants v1.3.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rogpeppe/go-internal v1.9.0 // indirect go.uber.org/atomic v1.7.0 // indirect diff --git a/go.sum b/go.sum index ac66ffa..8c568fd 100644 --- a/go.sum +++ b/go.sum @@ -140,6 +140,8 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/nbutton23/zxcvbn-go v0.0.0-20180912185939-ae427f1e4c1d/go.mod h1:o96djdrsSGy3AWPyBgZMAGfxZNfgntdJG+11KU4QvbU= +github.com/panjf2000/ants v1.3.0 h1:8pQ+8leaLc9lys2viEEr8md0U4RN6uOSUCE9bOYjQ9M= +github.com/panjf2000/ants v1.3.0/go.mod h1:AaACblRPzq35m1g3enqYcxspbbiOJJYaxU2wMpm1cXY= github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= From c0214bf1a52b5e92ce1b068eae7aa1a81e96b38c Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Sun, 10 Sep 2023 09:35:57 +0800 Subject: [PATCH 30/35] Add an unresolved bug --- lib/wal/wal.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/wal/wal.go b/lib/wal/wal.go index 90893bc..1d4af8c 100644 --- a/lib/wal/wal.go +++ b/lib/wal/wal.go @@ -139,6 +139,8 @@ func (w *Wal) InitReading() { // ReadNext reads the next operation from the WAL. func (w *Wal) ReadNext() (*Record, error) { buffer := make([]byte, 4+2+1+4) // Buffer size to read headers + + //TODO: has bug here, get the buffer is [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] _, err := w.m.Read(buffer, w.readOffset) if err == io.EOF { return nil, io.EOF From 1e31cbc0787de3d0c1250bce6d817b8c6d0f9e92 Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Sun, 10 Sep 2023 11:17:44 +0800 Subject: [PATCH 31/35] feat: complete memory keys method --- db/memory/db.go | 31 ++++++++++++++++++++++++++++++- db/memory/db_test.go | 24 ++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 1 deletion(-) diff --git a/db/memory/db.go b/db/memory/db.go index 457b7a4..e59ee99 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -274,7 +274,36 @@ func (d *Db) Delete(key []byte) error { } func (d *Db) Keys() ([][]byte, error) { - panic("implement me") + d.mux.RLock() + defer d.mux.RUnlock() + + // Create a slice to hold the keys + keys := make([][]byte, 0) + + // Collect keys from the active MemTable + for key := range d.mem.table { + keys = append(keys, []byte(key)) + } + + // Collect keys from the immutable MemTables (WAL data MemTables) + for _, mt := range d.walDataMtList { + for key := range mt.table { + keys = append(keys, []byte(key)) + } + } + + // Collect keys from the old MemTables (flushed MemTables) + for _, mt := range d.oldList { + for key := range mt.table { + keys = append(keys, []byte(key)) + } + } + + // Collect keys from the persistent storage (your DB implementation) + persistentKeys := d.db.GetListKeys() + keys = append(keys, persistentKeys...) + + return keys, nil } func (d *Db) Close() error { diff --git a/db/memory/db_test.go b/db/memory/db_test.go index e9fb1a8..00a0768 100644 --- a/db/memory/db_test.go +++ b/db/memory/db_test.go @@ -38,3 +38,27 @@ func TestPutAndGet(t *testing.T) { end = time.Now() fmt.Println("get time: ", end.Sub(start).String()) } + +func TestDb_Keys(t *testing.T) { + err := os.Mkdir("./flydb", os.ModePerm) + memOpt := config.DefaultDbMemoryOptions + memOpt.LogNum = 100 + memOpt.FileSize = 256 * 1024 * 1024 + memOpt.TotalMemSize = 2 * 1024 * 1024 * 1024 + memOpt.Option.DirPath = "./" + + db, err := NewDB(memOpt) + defer db.Clean() + assert.Nil(t, err) + assert.NotNil(t, db) + + for n := 0; n < 100; n++ { + err = db.Put(randkv.GetTestKey(n), randkv.RandomValue(24)) + assert.Nil(t, err) + } + + keys, err := db.Keys() + assert.Nil(t, err) + assert.Equal(t, 100, len(keys)) + t.Log(keys) +} From 8e8177e4b3a37ce5f0321ea6820889687eb56d46 Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Sun, 10 Sep 2023 11:22:56 +0800 Subject: [PATCH 32/35] feat: complete annotation in column --- db/column/column.go | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/db/column/column.go b/db/column/column.go index 40dc696..d9be1a6 100644 --- a/db/column/column.go +++ b/db/column/column.go @@ -91,6 +91,17 @@ type column struct { option config.ColumnOptions // column family options } +// CreateColumnFamily creates a new column family and associates it with the specified name. +// If a column family with the same name already exists, it returns an error. +// Column families are logical groups within the database that can contain different types of data. Each column family has its own in-memory table, Write-Ahead Logging (WAL), and persistent storage. +// +// Parameters: +// - name: The name of the column family to create. +// +// Returns: +// - If the column family is successfully created, it returns nil. +// If a column family with the same name already exists or an error occurs during creation, +// it returns the corresponding error message. func (c *column) CreateColumnFamily(name string) error { c.mux.Lock() defer c.mux.Unlock() @@ -106,6 +117,17 @@ func (c *column) CreateColumnFamily(name string) error { return nil } +// DropColumnFamily deletes a column family with the specified name. +// If the column family does not exist, it returns an error. +// This operation removes the associated data files and configurations for the column family. +// +// Parameters: +// - name: The name of the column family to delete. +// +// Returns: +// - If the column family is successfully deleted, it returns nil. +// If the column family does not exist or an error occurs during deletion, +// it returns the corresponding error message. func (c *column) DropColumnFamily(name string) error { c.mux.Lock() defer c.mux.Unlock() @@ -120,6 +142,12 @@ func (c *column) DropColumnFamily(name string) error { return nil } +// ListColumnFamilies returns a list of all existing column families in the database. +// +// Returns: +// - A slice of strings containing the names of all existing column families. +// - If there are no column families or an error occurs during retrieval, +// it returns an empty slice and an error message. func (c *column) ListColumnFamilies() ([]string, error) { c.mux.RLock() defer c.mux.RUnlock() @@ -146,6 +174,15 @@ func (c *column) Keys(cf string) ([][]byte, error) { return c.columnFamily[cf].Keys() } +// loadColumn loads and initializes column families from the specified base directory path. +// +// Parameters: +// - option: Configuration options for loading the column families. +// +// Returns: +// - A map where keys are column family names and values are corresponding in-memory databases (memory.Db). +// - If the base directory does not exist, an error is returned. +// - If there are any errors while loading or initializing column families, an error is returned. func loadColumn(option config.ColumnOptions) (map[string]*memory.Db, error) { base := option.DbMemoryOptions.Option.DirPath base = strings.Trim(base, "/") From 43c930fdaf7a21a229be9cd03aeb26da095ba06c Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Sun, 10 Sep 2023 11:23:25 +0800 Subject: [PATCH 33/35] feat: add column test --- db/column/column_test.go | 236 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 db/column/column_test.go diff --git a/db/column/column_test.go b/db/column/column_test.go new file mode 100644 index 0000000..fabeacf --- /dev/null +++ b/db/column/column_test.go @@ -0,0 +1,236 @@ +package column + +import ( + "github.com/ByteStorage/FlyDB/config" + "github.com/ByteStorage/FlyDB/lib/wal" + "github.com/stretchr/testify/assert" + "os" + "testing" +) + +var DefaultColumnOptions = config.ColumnOptions{ + DbMemoryOptions: config.DbMemoryOptions{ + Option: config.Options{ + DirPath: "./", + DataFileSize: 256 * 1024 * 1024, // 256MB + }, + LogNum: 1000, + FileSize: 256 * 1024 * 1024, // 256MB + SaveTime: 100 * 1000, + MemSize: 256 * 1024 * 1024, // 256MB + TotalMemSize: 1 * 1024 * 1024 * 1024, // 2GB + ColumnName: "default", + Wal: nil, + }, + WalOptions: wal.Options{ + DirPath: "./wal_test", + LogNum: 100, + FileSize: 100 * 1024 * 1024, + SaveTime: 100 * 1000, + }, +} + +func CleanWalTest() { + err := os.RemoveAll("./wal_test") + if err != nil { + return + } +} + +func TestColumn_CreateColumnFamily(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.CreateColumnFamily("test") + assert.NotNil(t, err) + + err = column.CreateColumnFamily("test1") + assert.Nil(t, err) + + err = column.CreateColumnFamily("test2") + assert.Nil(t, err) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) + + err = column.DropColumnFamily("test1") + assert.Nil(t, err) + + err = column.DropColumnFamily("test2") + assert.Nil(t, err) +} + +func TestColumn_ListColumnFamilies(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.CreateColumnFamily("test") + assert.NotNil(t, err) + + err = column.CreateColumnFamily("test1") + assert.Nil(t, err) + + err = column.CreateColumnFamily("test2") + assert.Nil(t, err) + + list, err := column.ListColumnFamilies() + assert.Nil(t, err) + assert.Equal(t, 4, len(list)) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) + + err = column.DropColumnFamily("test1") + assert.Nil(t, err) + + err = column.DropColumnFamily("test2") + assert.Nil(t, err) +} + +func TestColumn_Put(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.Put("test", []byte("test"), []byte("test")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test1"), []byte("test1")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test2"), []byte("test2")) + assert.Nil(t, err) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) + +} + +func TestColumn_Get(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.Put("test", []byte("test"), []byte("test")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test1"), []byte("test1")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test2"), []byte("test2")) + assert.Nil(t, err) + + value, err := column.Get("test", []byte("test")) + assert.Nil(t, err) + assert.Equal(t, []byte("test"), value) + + value, err = column.Get("test", []byte("test1")) + assert.Nil(t, err) + assert.Equal(t, []byte("test1"), value) + + value, err = column.Get("test", []byte("test2")) + assert.Nil(t, err) + assert.Equal(t, []byte("test2"), value) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) + +} + +func TestColumn_Delete(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.Put("test", []byte("test"), []byte("test")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test1"), []byte("test1")) + assert.Nil(t, err) + + value, err := column.Get("test", []byte("test")) + assert.Nil(t, err) + assert.Equal(t, []byte("test"), value) + + value, err = column.Get("test", []byte("test1")) + assert.Nil(t, err) + assert.Equal(t, []byte("test1"), value) + + err = column.Delete("test", []byte("test")) + assert.Nil(t, err) + + err = column.Delete("test", []byte("test1")) + assert.Nil(t, err) + + value, err = column.Get("test", []byte("test")) + assert.NotNil(t, err) + assert.Nil(t, value) + + value, err = column.Get("test", []byte("test1")) + assert.NotNil(t, err) + assert.Nil(t, value) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) + +} + +func TestColumn_Keys(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.Put("test", []byte("test"), []byte("test")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test1"), []byte("test1")) + assert.Nil(t, err) + + err = column.Put("test", []byte("test2"), []byte("test2")) + assert.Nil(t, err) + + keys, err := column.Keys("test") + assert.Nil(t, err) + assert.Equal(t, 3, len(keys)) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) +} From fb3ef4ef18a95a2732cef218ea994aea3b073322 Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Tue, 12 Sep 2023 21:59:31 +0800 Subject: [PATCH 34/35] ignore write wal --- db/memory/db.go | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/db/memory/db.go b/db/memory/db.go index e59ee99..5393c22 100644 --- a/db/memory/db.go +++ b/db/memory/db.go @@ -109,30 +109,7 @@ func (d *Db) Put(key []byte, value []byte) error { keyLen := int64(len(key)) valueLen := int64(len(value)) - //err := d.wal.Put(key, value) - //if err != nil { - // return err - //} - - d.walTask <- func() { - // Write to wal, try 3 times - ok := false - for i := 0; i < 3; i++ { - err := d.wal.Put(key, value) - if err == nil { - ok = true - break - } - } - if !ok { - err := d.wal.Delete(key) - if err != nil { - d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() - } - } - } - - //err := d.pool.Submit(func() { + //d.walTask <- func() { // // Write to wal, try 3 times // ok := false // for i := 0; i < 3; i++ { @@ -148,9 +125,6 @@ func (d *Db) Put(key []byte, value []byte) error { // d.errMsgCh <- "write to wal error when delete the key: " + string(key) + " error: " + err.Error() // } // } - //}) - //if err != nil { - // return err //} // if sync write, save wal From 6076723d9ab3d590b98d4a603b7ceabc4052550c Mon Sep 17 00:00:00 2001 From: qishenonly <1050026498@qq.com> Date: Wed, 13 Sep 2023 19:48:00 +0800 Subject: [PATCH 35/35] test column insert sql --- db/column/column_test.go | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/db/column/column_test.go b/db/column/column_test.go index fabeacf..a5561ae 100644 --- a/db/column/column_test.go +++ b/db/column/column_test.go @@ -234,3 +234,25 @@ func TestColumn_Keys(t *testing.T) { err = column.DropColumnFamily("test") assert.Nil(t, err) } + +func TestColumn_Put_Sql(t *testing.T) { + option := DefaultColumnOptions + defer CleanWalTest() + + column, err := NewColumn(option) + assert.Nil(t, err) + assert.NotNil(t, column) + + err = column.CreateColumnFamily("test") + assert.Nil(t, err) + + err = column.Put("test", []byte("test"), []byte("select * from database;")) + assert.Nil(t, err) + + value, err := column.Get("test", []byte("test")) + assert.Nil(t, err) + assert.Equal(t, []byte("select * from database;"), value) + + err = column.DropColumnFamily("test") + assert.Nil(t, err) +}