Skip to content

Commit

Permalink
add 4-way siphash based on highwayhash
Browse files Browse the repository at this point in the history
  • Loading branch information
tromp committed Nov 4, 2016
1 parent ea7fc04 commit ee30864
Show file tree
Hide file tree
Showing 17 changed files with 2,146 additions and 81 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,16 +101,17 @@ $1000 for an open source implementation for a consumer GPU combo
that finds 42-cycles twice as fast as cuda_miner.cu on comparable hardware.
Again with N ranging over {2^28,2^30,2^32}.

The Makefile defines corresponding targets cpubounty, tmtobounty, and gpubounty.
These bounties are admittedly modest in size,
but then claiming them might only require one or two insightful tweaks to
my existing implementations.

I invite anyone who'd like to see my claims tested to extend any of these bounties.
The Cuckoo Cycle Bounty Fund is accepting donations in BTC at
I invite anyone who'd like to see my claims tested to donate to
the Cuckoo Cycle Bounty Fund at

<a href="https://blockchain.info/address/1CnrpdKtfF3oAZmshyVC1EsRUa25nDuBvN">1CnrpdKtfF3oAZmshyVC1EsRUa25nDuBvN</a>
<a href="https://blockchain.info/address/1CnrpdKtfF3oAZmshyVC1EsRUa25nDuBvN">1CnrpdKtfF3oAZmshyVC1EsRUa25nDuBvN</a> (wallet balance as of Nov 1, 2016: 6.11 BTC)

Happy bounty hunting!
I intend for the total bounty value to stay ahead of funding levels. Happy bounty hunting!

Cryptocurrencies using Cuckoo Cycle
--------------
Expand Down
7 changes: 5 additions & 2 deletions src/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -Wno-deprecated-declarations shuts up Apple OSX clang
OPT = -O3
DOPT = -DPREFETCH
FLAGS = -Wall -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) $(DOPT) -pthread
FLAGS = -Wall -Wno-deprecated-declarations -D_POSIX_C_SOURCE=200112L $(OPT) $(DOPT) -I. -pthread
GPP = g++ -march=native -m64 -std=c++11 $(FLAGS)
LIBS = -lssl -lcrypto
# leave out -l crypto if using sha256.c instead of openssl
Expand All @@ -12,7 +12,7 @@ all : test example cuckoo28st cuckoo30st cuckoo32st cuckoo28 cuckoo30 cuckoo32 v
test: cuckoo20 verify20 Makefile
./cuckoo20 -n 13 | grep ^Sol | ./verify20 -n 13

cuckoo4: cuckoo.h cuckoo_miner.h simple_miner.cpp Makefile
cuckoo4: siphash.h cuckoo.h cuckoo_miner.h simple_miner.cpp Makefile
$(GPP) -o cuckoo4 -DSHOW -DIDXSHIFT=0 -DPROOFSIZE=6 -DSIZESHIFT=4 simple_miner.cpp $(LIBS)

cuckoo6: cuckoo.h cuckoo_miner.h cuckoo_miner.cpp Makefile
Expand Down Expand Up @@ -45,6 +45,9 @@ momentomatum: cuckoo.h momentomatum.h momentomatum.cpp Makefile
cuckoo28: cuckoo.h cuckoo_miner.h cuckoo_miner.cpp Makefile
$(GPP) -o cuckoo28 -DATOMIC -DSIZESHIFT=28 cuckoo_miner.cpp $(LIBS)

cuckoo28avx2: cuckoo.h cuckoo_miner.h cuckoo_miner.cpp Makefile
$(GPP) -o cuckoo28avx2 -DUSE_AVX2 -DATOMIC -DSIZESHIFT=28 cuckoo_miner.cpp $(LIBS)

cuckoo28.1: cuckoo.h cuckoo_miner.h cuckoo_miner.cpp Makefile
$(GPP) -o cuckoo28.1 -DATOMIC -DPART_BITS=1 -DSIZESHIFT=28 cuckoo_miner.cpp $(LIBS)

Expand Down
73 changes: 9 additions & 64 deletions src/cuckoo.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <stdint.h> // for types uint32_t,uint64_t
#include <string.h> // for functions strlen, memset
#include <openssl/sha.h> // if openssl absent, use #include "sha256.c"
#include "siphash.h"

// proof-of-work parameters
#ifndef SIZESHIFT
Expand Down Expand Up @@ -33,70 +34,14 @@
typedef uint32_t u32;
typedef uint64_t u64;

// siphash uses a state of four 64-bit words,
typedef union {
u64 v[4];
// or four 32-bit-word-pairs for the benefit of CUDA funnel shifter
#ifdef __CUDACC__
uint2 v2[4];
#endif
} siphash_ctx;

#define U8TO64_LE(p) \
(((u64)((p)[0]) ) | ((u64)((p)[1]) << 8) | \
((u64)((p)[2]) << 16) | ((u64)((p)[3]) << 24) | \
((u64)((p)[4]) << 32) | ((u64)((p)[5]) << 40) | \
((u64)((p)[6]) << 48) | ((u64)((p)[7]) << 56))

#ifndef SHA256
#define SHA256(d, n, md) do { \
SHA256_CTX c; \
SHA256_Init(&c); \
SHA256_Update(&c, d, n); \
SHA256_Final(md, &c); \
} while (0)
#endif

// derive siphash key from header
void setheader(siphash_ctx *ctx, const char *headernonce) {
unsigned char hdrkey[32];
SHA256((unsigned char *)headernonce, HEADERLEN, hdrkey);
u64 k0 = U8TO64_LE(hdrkey);
u64 k1 = U8TO64_LE(hdrkey+8);
ctx->v[0] = k0 ^ 0x736f6d6570736575ULL;
ctx->v[1] = k1 ^ 0x646f72616e646f6dULL;
ctx->v[2] = k0 ^ 0x6c7967656e657261ULL;
ctx->v[3] = k1 ^ 0x7465646279746573ULL;
}

#define ROTL(x,b) (u64)( ((x) << (b)) | ( (x) >> (64 - (b))) )
#define SIPROUND \
do { \
v0 += v1; v2 += v3; v1 = ROTL(v1,13); \
v3 = ROTL(v3,16); v1 ^= v0; v3 ^= v2; \
v0 = ROTL(v0,32); v2 += v1; v0 += v3; \
v1 = ROTL(v1,17); v3 = ROTL(v3,21); \
v1 ^= v2; v3 ^= v0; v2 = ROTL(v2,32); \
} while(0)

// SipHash-2-4 specialized to precomputed key and 8 byte nonces
u64 siphash24(siphash_ctx *ctx, u64 nonce) {
u64 v0 = ctx->v[0], v1 = ctx->v[1], v2 = ctx->v[2], v3 = ctx->v[3] ^ nonce;
SIPROUND; SIPROUND;
v0 ^= nonce;
v2 ^= 0xff;
SIPROUND; SIPROUND; SIPROUND; SIPROUND;
return v0 ^ v1 ^ v2 ^ v3;
}

// generate edge endpoint in cuckoo graph without partition bit
u64 _sipnode(siphash_ctx *ctx, u64 nonce, u32 uorv) {
return (siphash24(ctx, 2*nonce + uorv) & NODEMASK);
u64 __attribute__ ((noinline)) _sipnode(siphash_keys *keys, u64 nonce, u32 uorv) {
return siphash24(keys, 2*nonce + uorv) & NODEMASK;
}

// generate edge endpoint in cuckoo graph
u64 sipnode(siphash_ctx *ctx, u64 nonce, u32 uorv) {
return (siphash24(ctx, 2*nonce + uorv) & NODEMASK) << 1 | uorv;
u64 sipnode(siphash_keys *keys, u64 nonce, u32 uorv) {
return _sipnode(keys, nonce, uorv) << 1 | uorv;
}

enum verify_code { POW_OK, POW_HEADER_LENGTH, POW_TOO_BIG, POW_TOO_SMALL, POW_NON_MATCHING, POW_BRANCH, POW_DEAD_END, POW_SHORT_CYCLE};
Expand All @@ -106,17 +51,17 @@ const char *errstr[] = { "OK", "wrong header length", "proof too big", "proof to
int verify(u64 nonces[PROOFSIZE], const char *headernonce, const u32 headerlen) {
if (headerlen != HEADERLEN)
return POW_HEADER_LENGTH;
siphash_ctx ctx;
setheader(&ctx, headernonce);
siphash_keys keys;
setheader(&keys, headernonce);
u64 uvs[2*PROOFSIZE];
u64 xor0=0,xor1=0;
for (u32 n = 0; n < PROOFSIZE; n++) {
if (nonces[n] >= HALFSIZE)
return POW_TOO_BIG;
if (n && nonces[n] <= nonces[n-1])
return POW_TOO_SMALL;
xor0 ^= uvs[2*n ] = sipnode(&ctx, nonces[n], 0);
xor1 ^= uvs[2*n+1] = sipnode(&ctx, nonces[n], 1);
xor0 ^= uvs[2*n ] = sipnode(&keys, nonces[n], 0);
xor1 ^= uvs[2*n+1] = sipnode(&keys, nonces[n], 1);
}
if (xor0|xor1) // matching endpoints imply zero xors
return POW_NON_MATCHING;
Expand Down
94 changes: 88 additions & 6 deletions src/cuckoo_miner.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ typedef u64 nonce_t;
typedef u64 node_t;
#endif
#include <set>
#ifdef USE_AVX2
#include "highwayhash/siphashx4.h"
typedef highwayhash::uint64 ull64;
#endif

// algorithm parameters
#ifndef PART_BITS
Expand Down Expand Up @@ -183,7 +187,7 @@ class cuckoo_hash {

class cuckoo_ctx {
public:
siphash_ctx sip_ctx;
siphash_keys sip_keys;
shrinkingset *alive;
twice_set *nonleaf;
cuckoo_hash *cuckoo;
Expand All @@ -210,7 +214,7 @@ class cuckoo_ctx {
void setheadernonce(char* headernonce, const u32 len, const u32 nce) {
nonce = nce;
((u32 *)headernonce)[len/sizeof(u32)-1] = htole32(nonce); // place nonce at end
setheader(&sip_ctx, headernonce);
setheader(&sip_keys, headernonce);
alive->clear(); // set all edges to be alive
nsols = 0;
}
Expand Down Expand Up @@ -239,6 +243,83 @@ void barrier(pthread_barrier_t *barry) {
#define NODEPARTMASK (NODEMASK >> PART_BITS)
#define NONCETRUNC (1LL << (64 - NONCESHIFT))

#ifdef USE_AVX2
void count_node_deg(thread_ctx *tp, u32 uorv, u32 part) {
cuckoo_ctx *ctx = tp->ctx;
shrinkingset *alive = ctx->alive;
twice_set *nonleaf = ctx->nonleaf;
alignas(64) u64 indices[4];
alignas(64) u64 hashes[64];

for (nonce_t block = tp->id*64; block < HALFSIZE; block += ctx->nthreads*64) {
u32 nidx = 0;
u64 alive64 = alive->block(block);
for (nonce_t nonce = block-1; alive64; ) { // -1 compensates for 1-based ffs
u32 ffs = __builtin_ffsll(alive64);
nonce += ffs; alive64 >>= ffs;
indices[nidx++ % 4] = 2*nonce + uorv;
if (nidx % 4 == 0) {
highwayhash::siphashx4(&ctx->sip_keys, (ull64 *)indices, (ull64 *)hashes+nidx-4);
for (u32 i=0; i < 4; i++) {
u32 node = (hashes[nidx-4+i] & NODEMASK) >> PART_BITS;
nonleaf->prefetch(node);
}
}
if (ffs & 64) break; // can't shift by 64
}
if (nidx % 4 != 0) {
highwayhash::siphashx4(&ctx->sip_keys, (ull64 *)indices, (ull64 *)hashes+(nidx&-4));
for (u32 i=0; i < nidx%4; i++) {
u32 node = (hashes[(nidx&-4)+i] & NODEMASK) >> PART_BITS;
nonleaf->prefetch(node);
}
}
for (u32 i=0; i<nidx; i++) {
u32 node = (hashes[i] & NODEMASK) >> PART_BITS;
nonleaf->set(node);
}
}
}

void kill_leaf_edges(thread_ctx *tp, u32 uorv, u32 part) {
cuckoo_ctx *ctx = tp->ctx;
shrinkingset *alive = ctx->alive;
twice_set *nonleaf = ctx->nonleaf;
alignas(64) u64 indices[64];
alignas(64) u64 hashes[64];

for (nonce_t block = tp->id*64; block < HALFSIZE; block += ctx->nthreads*64) {
u32 nidx = 0;
u64 alive64 = alive->block(block);
for (nonce_t nonce = block-1; alive64; ) { // -1 compensates for 1-based ffs
u32 ffs = __builtin_ffsll(alive64);
nonce += ffs; alive64 >>= ffs;
indices[nidx++] = 2*nonce + uorv;
if (nidx % 4 == 0) {
highwayhash::siphashx4(&ctx->sip_keys, (ull64 *)indices+nidx-4, (ull64 *)hashes+nidx-4);
for (u32 i=0; i < 4; i++) {
u32 node = (hashes[nidx-4+i] & NODEMASK) >> PART_BITS;
nonleaf->prefetch(node);
}
}
if (ffs & 64) break; // can't shift by 64
}
if (nidx % 4 != 0) {
highwayhash::siphashx4(&ctx->sip_keys, (ull64 *)indices+(nidx&-4), (ull64 *)hashes+(nidx&-4));
for (u32 i=0; i < nidx%4; i++) {
u32 node = (hashes[(nidx&-4)+i] & NODEMASK) >> PART_BITS;
nonleaf->prefetch(node);
}
}
for (u32 i=0; i<nidx; i++) {
u32 node = (hashes[i] & NODEMASK) >> PART_BITS;
if (!nonleaf->test(node)) {
alive->reset(indices[i]/2, tp->id);
}
}
}
}
#else
void count_node_deg(thread_ctx *tp, u32 uorv, u32 part) {
cuckoo_ctx *ctx = tp->ctx;
shrinkingset *alive = ctx->alive;
Expand All @@ -251,7 +332,7 @@ void count_node_deg(thread_ctx *tp, u32 uorv, u32 part) {
for (nonce_t nonce = block-1; alive64; ) { // -1 compensates for 1-based ffs
u32 ffs = __builtin_ffsll(alive64);
nonce += ffs; alive64 >>= ffs;
node_t u = _sipnode(&ctx->sip_ctx, nonce, uorv);
node_t u = _sipnode(&ctx->sip_keys, nonce, uorv);
if ((u & PART_MASK) == part) {
buffer[bsize++] = u >> PART_BITS;
nonleaf->prefetch(u >> PART_BITS);
Expand All @@ -275,7 +356,7 @@ void kill_leaf_edges(thread_ctx *tp, u32 uorv, u32 part) {
for (nonce_t nonce = block-1; alive64; ) { // -1 compensates for 1-based ffs
u32 ffs = __builtin_ffsll(alive64);
nonce += ffs; alive64 >>= ffs;
node_t u = _sipnode(&ctx->sip_ctx, nonce, uorv);
node_t u = _sipnode(&ctx->sip_keys, nonce, uorv);
if ((u & PART_MASK) == part) {
buffer[bsize++] = ((u64)nonce << NONCESHIFT) | (u >> PART_BITS);
nonleaf->prefetch(u >> PART_BITS);
Expand All @@ -291,6 +372,7 @@ void kill_leaf_edges(thread_ctx *tp, u32 uorv, u32 part) {
}
}
}
#endif

u32 path(cuckoo_hash &cuckoo, node_t u, node_t *us) {
u32 nu;
Expand Down Expand Up @@ -327,7 +409,7 @@ void solution(cuckoo_ctx *ctx, node_t *us, u32 nu, node_t *vs, u32 nv) {
for (nonce_t nonce = block-1; alive64; ) { // -1 compensates for 1-based ffs
u32 ffs = __builtin_ffsll(alive64);
nonce += ffs; alive64 >>= ffs;
edge e(sipnode(&ctx->sip_ctx, nonce, 0), sipnode(&ctx->sip_ctx, nonce, 1));
edge e(sipnode(&ctx->sip_keys, nonce, 0), sipnode(&ctx->sip_keys, nonce, 1));
if (cycle.find(e) != cycle.end()) {
ctx->sols[soli][n++] = nonce;
#ifdef SHOWSOL
Expand Down Expand Up @@ -394,7 +476,7 @@ void *worker(void *vp) {
for (nonce_t nonce = block-1; alive64; ) { // -1 compensates for 1-based ffs
u32 ffs = __builtin_ffsll(alive64);
nonce += ffs; alive64 >>= ffs;
node_t u0=sipnode(&ctx->sip_ctx, nonce, 0), v0=sipnode(&ctx->sip_ctx, nonce, 1);
node_t u0=sipnode(&ctx->sip_keys, nonce, 0), v0=sipnode(&ctx->sip_keys, nonce, 1);
if (u0) {// ignore vertex 0 so it can be used as nil for cuckoo[]
u32 nu = path(cuckoo, u0, us), nv = path(cuckoo, v0, vs);
if (us[nu] == vs[nv]) {
Expand Down
Loading

0 comments on commit ee30864

Please sign in to comment.