Skip to content

Commit

Permalink
Consolidate array of primes
Browse files Browse the repository at this point in the history
  • Loading branch information
cespare committed Dec 1, 2022
1 parent fbdfba6 commit 3686901
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 43 deletions.
22 changes: 7 additions & 15 deletions xxhash.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,19 +16,11 @@ const (
prime5 uint64 = 2870177450012600261
)

// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where
// possible in the Go code is worth a small (but measurable) performance boost
// by avoiding some MOVQs. Vars are needed for the asm and also are useful for
// convenience in the Go code in a few places where we need to intentionally
// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the
// result overflows a uint64).
var (
prime1v = prime1
prime2v = prime2
prime3v = prime3
prime4v = prime4
prime5v = prime5
)
// Store the primes in an array as well.
//
// The consts are used when possible in Go code to avoid MOVs but we need a
// contiguous array of the assembly code.
var primes = [...]uint64{0, prime1, prime2, prime3, prime4, prime5}

// Digest implements hash.Hash64.
type Digest struct {
Expand All @@ -50,10 +42,10 @@ func New() *Digest {

// Reset clears the Digest's state so that it can be reused.
func (d *Digest) Reset() {
d.v1 = prime1v + prime2
d.v1 = primes[1] + prime2
d.v2 = prime2
d.v3 = 0
d.v4 = -prime1v
d.v4 = -primes[1]
d.total = 0
d.n = 0
}
Expand Down
28 changes: 14 additions & 14 deletions xxhash_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
// R10 v3
// R11 v4
// R12 tmp
// R13 prime1v
// R14 prime2v
// DI prime4v
// R13 prime1
// R14 prime2
// DI prime4

// round reads from and advances the buffer pointer in SI.
// It assumes that R13 has prime1v and R14 has prime2v.
// It assumes that R13 has prime1 and R14 has prime2.
#define round(r) \
MOVQ (SI), R12 \
ADDQ $8, SI \
Expand All @@ -30,7 +30,7 @@
IMULQ R13, r

// mergeRound applies a merge round on the two registers acc and val.
// It assumes that R13 has prime1v, R14 has prime2v, and DI has prime4v.
// It assumes that R13 has prime1, R14 has prime2, and DI has prime4.
#define mergeRound(acc, val) \
IMULQ R14, val \
ROLQ $31, val \
Expand All @@ -42,9 +42,9 @@
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT, $0-32
// Load fixed primes.
MOVQ ·prime1v(SB), R13
MOVQ ·prime2v(SB), R14
MOVQ ·prime4v(SB), DI
MOVQ ·primes+8(SB), R13
MOVQ ·primes+16(SB), R14
MOVQ ·primes+32(SB), DI

// Load slice.
MOVQ b_base+0(FP), SI
Expand Down Expand Up @@ -96,7 +96,7 @@ blockLoop:
JMP afterBlocks

noBlocks:
MOVQ ·prime5v(SB), AX
MOVQ ·primes+40(SB), AX

afterBlocks:
ADDQ DX, AX
Expand Down Expand Up @@ -135,7 +135,7 @@ fourByte:

ROLQ $23, AX
IMULQ R14, AX
ADDQ ·prime3v(SB), AX
ADDQ ·primes+24(SB), AX

singles:
ADDQ $4, BX
Expand All @@ -145,7 +145,7 @@ singles:
singlesLoop:
MOVBQZX (SI), R12
ADDQ $1, SI
IMULQ ·prime5v(SB), R12
IMULQ ·primes+40(SB), R12
XORQ R12, AX

ROLQ $11, AX
Expand All @@ -162,7 +162,7 @@ finalize:
MOVQ AX, R12
SHRQ $29, R12
XORQ R12, AX
IMULQ ·prime3v(SB), AX
IMULQ ·primes+24(SB), AX
MOVQ AX, R12
SHRQ $32, R12
XORQ R12, AX
Expand All @@ -176,8 +176,8 @@ finalize:
// func writeBlocks(d *Digest, b []byte) int
TEXT ·writeBlocks(SB), NOSPLIT, $0-40
// Load fixed primes needed for round.
MOVQ ·prime1v(SB), R13
MOVQ ·prime2v(SB), R14
MOVQ ·primes+8(SB), R13
MOVQ ·primes+16(SB), R14

// Load slice.
MOVQ b_base+8(FP), SI
Expand Down
16 changes: 4 additions & 12 deletions xxhash_arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -56,21 +56,13 @@
SUB $1, nblocks \
CBNZ nblocks, loop \

// Store the primes in a contiguous array so we can load them with LDP.
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
GLOBL primes<>(SB), NOPTR+RODATA, $40

// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOSPLIT|NOFRAME, $0-32
LDP b_base+0(FP), (p, n)

LDP primes<> +0(SB), (prime1, prime2)
LDP primes<>+16(SB), (prime3, prime4)
MOVD primes<>+32(SB), prime5
LDP ·primes+8(SB), (prime1, prime2)
LDP ·primes+24(SB), (prime3, prime4)
MOVD ·primes+40(SB), prime5

CMP $32, n
CSEL LT, prime5, ZR, h // if n < 32 { h = prime5 } else { h = 0 }
Expand Down Expand Up @@ -167,7 +159,7 @@ end:

// func writeBlocks(d *Digest, b []byte) int
TEXT ·writeBlocks(SB), NOSPLIT|NOFRAME, $0-40
LDP primes<>(SB), (prime1, prime2)
LDP ·primes+8(SB), (prime1, prime2)

// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
Expand Down
4 changes: 2 additions & 2 deletions xxhash_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@ func Sum64(b []byte) uint64 {
var h uint64

if n >= 32 {
v1 := prime1v + prime2
v1 := primes[1] + prime2
v2 := prime2
v3 := uint64(0)
v4 := -prime1v
v4 := -primes[1]
for len(b) >= 32 {
v1 = round(v1, u64(b[0:8:len(b)]))
v2 = round(v2, u64(b[8:16:len(b)]))
Expand Down

0 comments on commit 3686901

Please sign in to comment.