Skip to content

Commit

Permalink
Added pedersen hash generator utility in cmd/verkle (erigontech#5258)
Browse files Browse the repository at this point in the history
* added tree key functions for verkle tries

* added tree key functions for verkle tries

* added tree key functions for verkle tries

* Pedersen hash generator

* removed extra functions

* better comment

* ops

Co-authored-by: giuliorebuffo <[email protected]>
  • Loading branch information
Giulio2002 and giuliorebuffo authored Sep 2, 2022
1 parent 68cc6e4 commit faebec4
Show file tree
Hide file tree
Showing 5 changed files with 423 additions and 0 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ COMMANDS += rpctest
COMMANDS += sentry
COMMANDS += state
COMMANDS += txpool
COMMANDS += verkle

# build each command using %.cmd rule
$(COMMANDS): %: %.cmd
Expand Down
46 changes: 46 additions & 0 deletions cmd/verkle/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package main

import (
"context"
"flag"

"github.com/ledgerwatch/erigon-lib/kv/mdbx"
"github.com/ledgerwatch/log/v3"
)

func main() {
ctx := context.Background()
chaindata := flag.String("chaindata", "chaindata", "path to the chaindata database file")
out := flag.String("out", "out", "path to the output chaindata database file")
flag.Parse()
log.Root().SetHandler(log.LvlFilterHandler(log.Lvl(3), log.StderrHandler))
db, err := mdbx.Open(*chaindata, log.Root(), true)
if err != nil {
log.Error("Error while opening database", "err", err.Error())
return
}
defer db.Close()

dbOut, err := mdbx.Open(*out, log.Root(), false)
if err != nil {
log.Error("Error while opening db transaction", "err", err.Error())
return
}

txOut, err := dbOut.BeginRw(ctx)
if err != nil {
log.Error("Error while opening db transaction", "err", err.Error())
return
}
defer txOut.Rollback()

tx, err := db.BeginRo(ctx)
if err != nil {
log.Error("Error while opening db transaction", "err", err.Error())
return
}

log.Info("Opened Database", "datadir", *chaindata)

RegeneratePedersenHashstate(txOut, tx)
}
127 changes: 127 additions & 0 deletions cmd/verkle/pedersen_hashstate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package main

import (
"encoding/binary"
"time"

"github.com/holiman/uint256"
"github.com/ledgerwatch/erigon-lib/etl"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon/common"
"github.com/ledgerwatch/erigon/core/types/accounts"
"github.com/ledgerwatch/erigon/turbo/trie/vtree"
"github.com/ledgerwatch/log/v3"
)

func retrieveAccountKeys(address common.Address) (versionKey, balanceKey, codeSizeKey, codeHashKey, noncekey [32]byte) {
// Process the polynomial
versionkey := vtree.GetTreeKeyVersion(address[:])
copy(balanceKey[:], versionkey)
balanceKey[31] = vtree.BalanceLeafKey
copy(noncekey[:], versionkey)
noncekey[31] = vtree.NonceLeafKey
copy(codeSizeKey[:], versionkey)
codeSizeKey[31] = vtree.CodeSizeLeafKey
copy(codeHashKey[:], versionkey)
codeHashKey[31] = vtree.CodeKeccakLeafKey
return
}

func RegeneratePedersenHashstate(outTx kv.RwTx, readTx kv.Tx) error {
pedersenHashStateBucket := "PedersenHashState"
pedersenHashStorageBucket := "PedersenHashStorage"
start := time.Now()
log.Info("Started Generation of the Pedersen Hashed State")
if err := outTx.CreateBucket(pedersenHashStateBucket); err != nil {
return err
}
if err := outTx.CreateBucket(pedersenHashStorageBucket); err != nil {
return err
}
stateCollector := etl.NewCollector("Pedersen State", "/tmp/etl-temp", etl.NewSortableBuffer(etl.BufferOptimalSize))
defer stateCollector.Close()

storageCollector := etl.NewCollector("Pedersen Storage", "/tmp/etl-temp", etl.NewSortableBuffer(etl.BufferOptimalSize))
defer storageCollector.Close()

plainStateCursor, err := readTx.Cursor(kv.PlainState)
if err != nil {
return err
}
logInterval := time.NewTicker(30 * time.Second)
for k, v, err := plainStateCursor.First(); k != nil; k, v, err = plainStateCursor.Next() {
if err != nil {
return err
}
if len(k) == 20 {
versionKey, balanceKey, codeSizeKey, codeHashKey, nonceKey := retrieveAccountKeys(common.BytesToAddress(k))
if err := stateCollector.Collect(versionKey[:], []byte{0}); err != nil {
return err
}
// Process nonce
nonceValue := make([]byte, 8)
acc := accounts.NewAccount()
if err := acc.DecodeForStorage(v); err != nil {
return err
}
binary.LittleEndian.PutUint64(nonceValue, acc.Nonce)
if err := stateCollector.Collect(nonceKey[:], nonceValue); err != nil {
return err
}
// Process Balance
balanceBytes := acc.Balance.ToBig().Bytes()
balanceValue := make([]byte, 32)
if len(balanceBytes) > 0 {
for i := range balanceBytes {
balanceValue[len(balanceBytes)-i-1] = balanceBytes[i]
}
}
if err := stateCollector.Collect(balanceKey[:], balanceValue); err != nil {
return err
}
// Process Code Size
codeSizeValue := make([]byte, 8)
if !accounts.IsEmptyCodeHash(acc.CodeHash) {
code, err := readTx.GetOne(kv.Code, acc.CodeHash[:])
if err != nil {
return err
}
// Chunkify contract code and build keys for each chunks and insert them in the tree
chunkedCode, err := vtree.ChunkifyCode(code)
if err != nil {
return err
}
// Write code chunks
for i := 0; i < len(chunkedCode); i += 32 {
stateCollector.Collect(vtree.GetTreeKeyCodeChunk(k, uint256.NewInt(uint64(i)/32)), chunkedCode[i:i+32])
}

// Set code size
binary.LittleEndian.PutUint64(codeSizeValue, uint64(len(code)))
}

if err := stateCollector.Collect(codeSizeKey[:], codeSizeValue); err != nil {
return err
}
// Process Code Hash
if err := stateCollector.Collect(codeHashKey[:], acc.CodeHash[:]); err != nil {
return err
}

} else if len(k) == 60 {
// Process storage
storageCollector.Collect(vtree.GetTreeKeyStorageSlot(k[:20], new(uint256.Int).SetBytes(k[28:])), v)
}
select {
case <-logInterval.C:
log.Info("[Pedersen Hashing] Current progress in Collection Phase", "key", common.Bytes2Hex(k))
default:
}
}
stateCollector.Load(outTx, pedersenHashStateBucket, etl.IdentityLoadFunc, etl.TransformArgs{})
storageCollector.Load(outTx, pedersenHashStorageBucket, etl.IdentityLoadFunc, etl.TransformArgs{})

log.Info("Pedersen hashed state finished", "elapsed", time.Until(start))
return outTx.Commit()

}
203 changes: 203 additions & 0 deletions turbo/trie/vtree/verkle_utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package vtree

import (
"github.com/crate-crypto/go-ipa/bandersnatch/fr"
"github.com/gballet/go-verkle"
"github.com/holiman/uint256"
)

const (
VersionLeafKey = 0
BalanceLeafKey = 1
NonceLeafKey = 2
CodeKeccakLeafKey = 3
CodeSizeLeafKey = 4
)

var (
zero = uint256.NewInt(0)
HeaderStorageOffset = uint256.NewInt(64)
CodeOffset = uint256.NewInt(128)
MainStorageOffset = new(uint256.Int).Lsh(uint256.NewInt(256), 31)
VerkleNodeWidth = uint256.NewInt(256)
codeStorageDelta = uint256.NewInt(0).Sub(CodeOffset, HeaderStorageOffset)

getTreePolyIndex0Point *verkle.Point
)

func init() {
getTreePolyIndex0Point = new(verkle.Point)
err := getTreePolyIndex0Point.SetBytes([]byte{34, 25, 109, 242, 193, 5, 144, 224, 76, 52, 189, 92, 197, 126, 9, 145, 27, 152, 199, 130, 165, 3, 210, 27, 193, 131, 142, 28, 110, 26, 16, 191})
if err != nil {
panic(err)
}
}

// GetTreeKey performs both the work of the spec's get_tree_key function, and that
// of pedersen_hash: it builds the polynomial in pedersen_hash without having to
// create a mostly zero-filled buffer and "type cast" it to a 128-long 16-byte
// array. Since at most the first 5 coefficients of the polynomial will be non-zero,
// these 5 coefficients are created directly.
func GetTreeKey(address []byte, treeIndex *uint256.Int, subIndex byte) []byte {
if len(address) < 32 {
var aligned [32]byte
address = append(aligned[:32-len(address)], address...)
}
var poly [5]fr.Element

poly[0].SetZero()

// 32-byte address, interpreted as two little endian
// 16-byte numbers.
verkle.FromLEBytes(&poly[1], address[:16])
verkle.FromLEBytes(&poly[2], address[16:])

// little-endian, 32-byte aligned treeIndex
var index [32]byte
for i, b := range treeIndex.Bytes() {
index[len(treeIndex.Bytes())-1-i] = b
}
verkle.FromLEBytes(&poly[3], index[:16])
verkle.FromLEBytes(&poly[4], index[16:])

cfg, _ := verkle.GetConfig()
ret := cfg.CommitToPoly(poly[:], 0)

// add a constant point
ret.Add(ret, getTreePolyIndex0Point)

return PointToHash(ret, subIndex)

}

func GetTreeKeyAccountLeaf(address []byte, leaf byte) []byte {
return GetTreeKey(address, zero, leaf)
}

func GetTreeKeyVersion(address []byte) []byte {
return GetTreeKey(address, zero, VersionLeafKey)
}

func GetTreeKeyBalance(address []byte) []byte {
return GetTreeKey(address, zero, BalanceLeafKey)
}

func GetTreeKeyNonce(address []byte) []byte {
return GetTreeKey(address, zero, NonceLeafKey)
}

func GetTreeKeyCodeKeccak(address []byte) []byte {
return GetTreeKey(address, zero, CodeKeccakLeafKey)
}

func GetTreeKeyCodeSize(address []byte) []byte {
return GetTreeKey(address, zero, CodeSizeLeafKey)
}

func GetTreeKeyCodeChunk(address []byte, chunk *uint256.Int) []byte {
chunkOffset := new(uint256.Int).Add(CodeOffset, chunk)
treeIndex := new(uint256.Int).Div(chunkOffset, VerkleNodeWidth)
subIndexMod := new(uint256.Int).Mod(chunkOffset, VerkleNodeWidth).Bytes()
var subIndex byte
if len(subIndexMod) != 0 {
subIndex = subIndexMod[0]
}
return GetTreeKey(address, treeIndex, subIndex)
}

func GetTreeKeyStorageSlot(address []byte, storageKey *uint256.Int) []byte {
pos := storageKey.Clone()
if storageKey.Cmp(codeStorageDelta) < 0 {
pos.Add(HeaderStorageOffset, storageKey)
} else {
pos.Add(MainStorageOffset, storageKey)
}
treeIndex := new(uint256.Int).Div(pos, VerkleNodeWidth)

// calculate the sub_index, i.e. the index in the stem tree.
// Because the modulus is 256, it's the last byte of treeIndex
subIndexMod := new(uint256.Int).Mod(pos, VerkleNodeWidth).Bytes()
var subIndex byte
if len(subIndexMod) != 0 {
// uint256 is broken into 4 little-endian quads,
// each with native endianness. Extract the least
// significant byte.
subIndex = subIndexMod[0] & 0xFF
}
return GetTreeKey(address, treeIndex, subIndex)
}

func PointToHash(evaluated *verkle.Point, suffix byte) []byte {
// The output of Byte() is big engian for banderwagon. This
// introduces an imbalance in the tree, because hashes are
// elements of a 253-bit field. This means more than half the
// tree would be empty. To avoid this problem, use a little
// endian commitment and chop the MSB.
retb := evaluated.Bytes()
for i := 0; i < 16; i++ {
retb[31-i], retb[i] = retb[i], retb[31-i]
}
retb[31] = suffix
return retb[:]
}

const (
PUSH1 = byte(0x60)
PUSH3 = byte(0x62)
PUSH4 = byte(0x63)
PUSH7 = byte(0x66)
PUSH21 = byte(0x74)
PUSH30 = byte(0x7d)
PUSH32 = byte(0x7f)
)

// ChunkifyCode generates the chunked version of an array representing EVM bytecode
func ChunkifyCode(code []byte) ([]byte, error) {
var (
chunkOffset = 0 // offset in the chunk
chunkCount = len(code) / 31
codeOffset = 0 // offset in the code
)
if len(code)%31 != 0 {
chunkCount++
}
chunks := make([]byte, chunkCount*32)
for i := 0; i < chunkCount; i++ {
// number of bytes to copy, 31 unless
// the end of the code has been reached.
end := 31 * (i + 1)
if len(code) < end {
end = len(code)
}

// Copy the code itself
copy(chunks[i*32+1:], code[31*i:end])

// chunk offset = taken from the
// last chunk.
if chunkOffset > 31 {
// skip offset calculation if push
// data covers the whole chunk
chunks[i*32] = 31
chunkOffset = 1
continue
}
chunks[32*i] = byte(chunkOffset)
chunkOffset = 0

// Check each instruction and update the offset
// it should be 0 unless a PUSHn overflows.
for ; codeOffset < end; codeOffset++ {
if code[codeOffset] >= PUSH1 && code[codeOffset] <= PUSH32 {
codeOffset += int(code[codeOffset] - PUSH1 + 1)
if codeOffset+1 >= 31*(i+1) {
codeOffset++
chunkOffset = codeOffset - 31*(i+1)
break
}
}
}
}

return chunks, nil
}
Loading

0 comments on commit faebec4

Please sign in to comment.