diff --git a/Makefile b/Makefile index c4fed293fc3..c264d0f4b2c 100644 --- a/Makefile +++ b/Makefile @@ -110,6 +110,7 @@ COMMANDS += rpctest COMMANDS += sentry COMMANDS += state COMMANDS += txpool +COMMANDS += verkle # build each command using %.cmd rule $(COMMANDS): %: %.cmd diff --git a/cmd/verkle/main.go b/cmd/verkle/main.go new file mode 100644 index 00000000000..b097569e55d --- /dev/null +++ b/cmd/verkle/main.go @@ -0,0 +1,46 @@ +package main + +import ( + "context" + "flag" + + "github.com/ledgerwatch/erigon-lib/kv/mdbx" + "github.com/ledgerwatch/log/v3" +) + +func main() { + ctx := context.Background() + chaindata := flag.String("chaindata", "chaindata", "path to the chaindata database file") + out := flag.String("out", "out", "path to the output chaindata database file") + flag.Parse() + log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(3), log.StderrHandler)) + db, err := mdbx.Open(*chaindata, log.Root(), true) + if err != nil { + log.Error("Error while opening database", "err", err.Error()) + return + } + defer db.Close() + + dbOut, err := mdbx.Open(*out, log.Root(), false) + if err != nil { + log.Error("Error while opening db transaction", "err", err.Error()) + return + } + + txOut, err := dbOut.BeginRw(ctx) + if err != nil { + log.Error("Error while opening db transaction", "err", err.Error()) + return + } + defer txOut.Rollback() + + tx, err := db.BeginRo(ctx) + if err != nil { + log.Error("Error while opening db transaction", "err", err.Error()) + return + } + + log.Info("Opened Database", "datadir", *chaindata) + + RegeneratePedersenHashstate(txOut, tx) +} diff --git a/cmd/verkle/pedersen_hashstate.go b/cmd/verkle/pedersen_hashstate.go new file mode 100644 index 00000000000..ad935665186 --- /dev/null +++ b/cmd/verkle/pedersen_hashstate.go @@ -0,0 +1,127 @@ +package main + +import ( + "encoding/binary" + "time" + + "github.com/holiman/uint256" + "github.com/ledgerwatch/erigon-lib/etl" + "github.com/ledgerwatch/erigon-lib/kv" + "github.com/ledgerwatch/erigon/common" + "github.com/ledgerwatch/erigon/core/types/accounts" + "github.com/ledgerwatch/erigon/turbo/trie/vtree" + "github.com/ledgerwatch/log/v3" +) + +func retrieveAccountKeys(address common.Address) (versionKey, balanceKey, codeSizeKey, codeHashKey, noncekey [32]byte) { + // Process the polynomial + versionkey := vtree.GetTreeKeyVersion(address[:]) + copy(balanceKey[:], versionkey) + balanceKey[31] = vtree.BalanceLeafKey + copy(noncekey[:], versionkey) + noncekey[31] = vtree.NonceLeafKey + copy(codeSizeKey[:], versionkey) + codeSizeKey[31] = vtree.CodeSizeLeafKey + copy(codeHashKey[:], versionkey) + codeHashKey[31] = vtree.CodeKeccakLeafKey + return +} + +func RegeneratePedersenHashstate(outTx kv.RwTx, readTx kv.Tx) error { + pedersenHashStateBucket := "PedersenHashState" + pedersenHashStorageBucket := "PedersenHashStorage" + start := time.Now() + log.Info("Started Generation of the Pedersen Hashed State") + if err := outTx.CreateBucket(pedersenHashStateBucket); err != nil { + return err + } + if err := outTx.CreateBucket(pedersenHashStorageBucket); err != nil { + return err + } + stateCollector := etl.NewCollector("Pedersen State", "/tmp/etl-temp", etl.NewSortableBuffer(etl.BufferOptimalSize)) + defer stateCollector.Close() + + storageCollector := etl.NewCollector("Pedersen Storage", "/tmp/etl-temp", etl.NewSortableBuffer(etl.BufferOptimalSize)) + defer storageCollector.Close() + + plainStateCursor, err := readTx.Cursor(kv.PlainState) + if err != nil { + return err + } + logInterval := time.NewTicker(30 * time.Second) + for k, v, err := plainStateCursor.First(); k != nil; k, v, err = plainStateCursor.Next() { + if err != nil { + return err + } + if len(k) == 20 { + versionKey, balanceKey, codeSizeKey, codeHashKey, nonceKey := retrieveAccountKeys(common.BytesToAddress(k)) + if err := stateCollector.Collect(versionKey[:], []byte{0}); err != nil { + return err + } + // Process nonce + nonceValue := make([]byte, 8) + acc := accounts.NewAccount() + if err := acc.DecodeForStorage(v); err != nil { + return err + } + binary.LittleEndian.PutUint64(nonceValue, acc.Nonce) + if err := stateCollector.Collect(nonceKey[:], nonceValue); err != nil { + return err + } + // Process Balance + balanceBytes := acc.Balance.ToBig().Bytes() + balanceValue := make([]byte, 32) + if len(balanceBytes) > 0 { + for i := range balanceBytes { + balanceValue[len(balanceBytes)-i-1] = balanceBytes[i] + } + } + if err := stateCollector.Collect(balanceKey[:], balanceValue); err != nil { + return err + } + // Process Code Size + codeSizeValue := make([]byte, 8) + if !accounts.IsEmptyCodeHash(acc.CodeHash) { + code, err := readTx.GetOne(kv.Code, acc.CodeHash[:]) + if err != nil { + return err + } + // Chunkify contract code and build keys for each chunks and insert them in the tree + chunkedCode, err := vtree.ChunkifyCode(code) + if err != nil { + return err + } + // Write code chunks + for i := 0; i < len(chunkedCode); i += 32 { + stateCollector.Collect(vtree.GetTreeKeyCodeChunk(k, uint256.NewInt(uint64(i)/32)), chunkedCode[i:i+32]) + } + + // Set code size + binary.LittleEndian.PutUint64(codeSizeValue, uint64(len(code))) + } + + if err := stateCollector.Collect(codeSizeKey[:], codeSizeValue); err != nil { + return err + } + // Process Code Hash + if err := stateCollector.Collect(codeHashKey[:], acc.CodeHash[:]); err != nil { + return err + } + + } else if len(k) == 60 { + // Process storage + storageCollector.Collect(vtree.GetTreeKeyStorageSlot(k[:20], new(uint256.Int).SetBytes(k[28:])), v) + } + select { + case <-logInterval.C: + log.Info("[Pedersen Hashing] Current progress in Collection Phase", "key", common.Bytes2Hex(k)) + default: + } + } + stateCollector.Load(outTx, pedersenHashStateBucket, etl.IdentityLoadFunc, etl.TransformArgs{}) + storageCollector.Load(outTx, pedersenHashStorageBucket, etl.IdentityLoadFunc, etl.TransformArgs{}) + + log.Info("Pedersen hashed state finished", "elapsed", time.Until(start)) + return outTx.Commit() + +} diff --git a/turbo/trie/vtree/verkle_utils.go b/turbo/trie/vtree/verkle_utils.go new file mode 100644 index 00000000000..4fe02326ef6 --- /dev/null +++ b/turbo/trie/vtree/verkle_utils.go @@ -0,0 +1,203 @@ +package vtree + +import ( + "github.com/crate-crypto/go-ipa/bandersnatch/fr" + "github.com/gballet/go-verkle" + "github.com/holiman/uint256" +) + +const ( + VersionLeafKey = 0 + BalanceLeafKey = 1 + NonceLeafKey = 2 + CodeKeccakLeafKey = 3 + CodeSizeLeafKey = 4 +) + +var ( + zero = uint256.NewInt(0) + HeaderStorageOffset = uint256.NewInt(64) + CodeOffset = uint256.NewInt(128) + MainStorageOffset = new(uint256.Int).Lsh(uint256.NewInt(256), 31) + VerkleNodeWidth = uint256.NewInt(256) + codeStorageDelta = uint256.NewInt(0).Sub(CodeOffset, HeaderStorageOffset) + + getTreePolyIndex0Point *verkle.Point +) + +func init() { + getTreePolyIndex0Point = new(verkle.Point) + err := getTreePolyIndex0Point.SetBytes([]byte{34, 25, 109, 242, 193, 5, 144, 224, 76, 52, 189, 92, 197, 126, 9, 145, 27, 152, 199, 130, 165, 3, 210, 27, 193, 131, 142, 28, 110, 26, 16, 191}) + if err != nil { + panic(err) + } +} + +// GetTreeKey performs both the work of the spec's get_tree_key function, and that +// of pedersen_hash: it builds the polynomial in pedersen_hash without having to +// create a mostly zero-filled buffer and "type cast" it to a 128-long 16-byte +// array. Since at most the first 5 coefficients of the polynomial will be non-zero, +// these 5 coefficients are created directly. +func GetTreeKey(address []byte, treeIndex *uint256.Int, subIndex byte) []byte { + if len(address) < 32 { + var aligned [32]byte + address = append(aligned[:32-len(address)], address...) + } + var poly [5]fr.Element + + poly[0].SetZero() + + // 32-byte address, interpreted as two little endian + // 16-byte numbers. + verkle.FromLEBytes(&poly[1], address[:16]) + verkle.FromLEBytes(&poly[2], address[16:]) + + // little-endian, 32-byte aligned treeIndex + var index [32]byte + for i, b := range treeIndex.Bytes() { + index[len(treeIndex.Bytes())-1-i] = b + } + verkle.FromLEBytes(&poly[3], index[:16]) + verkle.FromLEBytes(&poly[4], index[16:]) + + cfg, _ := verkle.GetConfig() + ret := cfg.CommitToPoly(poly[:], 0) + + // add a constant point + ret.Add(ret, getTreePolyIndex0Point) + + return PointToHash(ret, subIndex) + +} + +func GetTreeKeyAccountLeaf(address []byte, leaf byte) []byte { + return GetTreeKey(address, zero, leaf) +} + +func GetTreeKeyVersion(address []byte) []byte { + return GetTreeKey(address, zero, VersionLeafKey) +} + +func GetTreeKeyBalance(address []byte) []byte { + return GetTreeKey(address, zero, BalanceLeafKey) +} + +func GetTreeKeyNonce(address []byte) []byte { + return GetTreeKey(address, zero, NonceLeafKey) +} + +func GetTreeKeyCodeKeccak(address []byte) []byte { + return GetTreeKey(address, zero, CodeKeccakLeafKey) +} + +func GetTreeKeyCodeSize(address []byte) []byte { + return GetTreeKey(address, zero, CodeSizeLeafKey) +} + +func GetTreeKeyCodeChunk(address []byte, chunk *uint256.Int) []byte { + chunkOffset := new(uint256.Int).Add(CodeOffset, chunk) + treeIndex := new(uint256.Int).Div(chunkOffset, VerkleNodeWidth) + subIndexMod := new(uint256.Int).Mod(chunkOffset, VerkleNodeWidth).Bytes() + var subIndex byte + if len(subIndexMod) != 0 { + subIndex = subIndexMod[0] + } + return GetTreeKey(address, treeIndex, subIndex) +} + +func GetTreeKeyStorageSlot(address []byte, storageKey *uint256.Int) []byte { + pos := storageKey.Clone() + if storageKey.Cmp(codeStorageDelta) < 0 { + pos.Add(HeaderStorageOffset, storageKey) + } else { + pos.Add(MainStorageOffset, storageKey) + } + treeIndex := new(uint256.Int).Div(pos, VerkleNodeWidth) + + // calculate the sub_index, i.e. the index in the stem tree. + // Because the modulus is 256, it's the last byte of treeIndex + subIndexMod := new(uint256.Int).Mod(pos, VerkleNodeWidth).Bytes() + var subIndex byte + if len(subIndexMod) != 0 { + // uint256 is broken into 4 little-endian quads, + // each with native endianness. Extract the least + // significant byte. + subIndex = subIndexMod[0] & 0xFF + } + return GetTreeKey(address, treeIndex, subIndex) +} + +func PointToHash(evaluated *verkle.Point, suffix byte) []byte { + // The output of Byte() is big engian for banderwagon. This + // introduces an imbalance in the tree, because hashes are + // elements of a 253-bit field. This means more than half the + // tree would be empty. To avoid this problem, use a little + // endian commitment and chop the MSB. + retb := evaluated.Bytes() + for i := 0; i < 16; i++ { + retb[31-i], retb[i] = retb[i], retb[31-i] + } + retb[31] = suffix + return retb[:] +} + +const ( + PUSH1 = byte(0x60) + PUSH3 = byte(0x62) + PUSH4 = byte(0x63) + PUSH7 = byte(0x66) + PUSH21 = byte(0x74) + PUSH30 = byte(0x7d) + PUSH32 = byte(0x7f) +) + +// ChunkifyCode generates the chunked version of an array representing EVM bytecode +func ChunkifyCode(code []byte) ([]byte, error) { + var ( + chunkOffset = 0 // offset in the chunk + chunkCount = len(code) / 31 + codeOffset = 0 // offset in the code + ) + if len(code)%31 != 0 { + chunkCount++ + } + chunks := make([]byte, chunkCount*32) + for i := 0; i < chunkCount; i++ { + // number of bytes to copy, 31 unless + // the end of the code has been reached. + end := 31 * (i + 1) + if len(code) < end { + end = len(code) + } + + // Copy the code itself + copy(chunks[i*32+1:], code[31*i:end]) + + // chunk offset = taken from the + // last chunk. + if chunkOffset > 31 { + // skip offset calculation if push + // data covers the whole chunk + chunks[i*32] = 31 + chunkOffset = 1 + continue + } + chunks[32*i] = byte(chunkOffset) + chunkOffset = 0 + + // Check each instruction and update the offset + // it should be 0 unless a PUSHn overflows. + for ; codeOffset < end; codeOffset++ { + if code[codeOffset] >= PUSH1 && code[codeOffset] <= PUSH32 { + codeOffset += int(code[codeOffset] - PUSH1 + 1) + if codeOffset+1 >= 31*(i+1) { + codeOffset++ + chunkOffset = codeOffset - 31*(i+1) + break + } + } + } + } + + return chunks, nil +} diff --git a/turbo/trie/vtree/verkle_utils_test.go b/turbo/trie/vtree/verkle_utils_test.go new file mode 100644 index 00000000000..6b029f4b1d1 --- /dev/null +++ b/turbo/trie/vtree/verkle_utils_test.go @@ -0,0 +1,46 @@ +package vtree + +import ( + "crypto/sha256" + "math/big" + "math/rand" + "testing" +) + +func BenchmarkPedersenHash(b *testing.B) { + var addr, v [32]byte + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + rand.Read(v[:]) + rand.Read(addr[:]) + GetTreeKeyCodeSize(addr[:]) + } +} + +func sha256GetTreeKeyCodeSize(addr []byte) []byte { + digest := sha256.New() + digest.Write(addr) + treeIndexBytes := new(big.Int).Bytes() + var payload [32]byte + copy(payload[:len(treeIndexBytes)], treeIndexBytes) + digest.Write(payload[:]) + h := digest.Sum(nil) + h[31] = CodeKeccakLeafKey + return h +} + +func BenchmarkSha256Hash(b *testing.B) { + var addr, v [32]byte + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + rand.Read(v[:]) + rand.Read(addr[:]) + sha256GetTreeKeyCodeSize(addr[:]) + } +}