Skip to content

Commit

Permalink
Merge pull request fireice-uk#2468 from psychocrypt/topic-relaxNvidia…
Browse files Browse the repository at this point in the history
…Threads

NVIDIA: releax 'threads' config
  • Loading branch information
fireice-uk authored Jul 2, 2019
2 parents fa08b26 + 442581e commit 00a8479
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 7 deletions.
1 change: 1 addition & 0 deletions xmrstak/backend/nvidia/nvcc_code/cryptonight.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ typedef struct
int device_threads;
int device_bfactor;
int device_bsleep;
int device_maxThreadsPerBlock;
int syncMode;
bool memMode;

Expand Down
27 changes: 21 additions & 6 deletions xmrstak/backend/nvidia/nvcc_code/cuda_core.cu
Original file line number Diff line number Diff line change
Expand Up @@ -895,12 +895,19 @@ void cryptonight_core_gpu_hash(nvid_ctx* ctx, uint32_t nonce, const xmrstak_algo
roundsPhase3 *= 2;
}

int blockSizePhase3 = block8.x;
int gridSizePhase3 = grid.x;
if(blockSizePhase3 * 2 <= ctx->device_maxThreadsPerBlock)
{
blockSizePhase3 *= 2;
gridSizePhase3 = (blockSizePhase3 + 1) / 2;
}
for(int i = 0; i < roundsPhase3; i++)
{
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ALGO><<<
(grid.x + 1) / 2,
block8.x * 2,
2 * block8.x * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
gridSizePhase3,
blockSizePhase3,
blockSizePhase3 * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
ITERATIONS,
MEM,
ctx->device_blocks * ctx->device_threads,
Expand Down Expand Up @@ -966,12 +973,20 @@ void cryptonight_core_gpu_hash_gpu(nvid_ctx* ctx, uint32_t nonce, const xmrstak_
roundsPhase3 *= 2;
}

int blockSizePhase3 = block8.x;
int gridSizePhase3 = grid.x;
if(blockSizePhase3 * 2 <= ctx->device_maxThreadsPerBlock)
{
blockSizePhase3 *= 2;
gridSizePhase3 = (blockSizePhase3 + 1) / 2;
}

for(int i = 0; i < roundsPhase3; i++)
{
CUDA_CHECK_KERNEL(ctx->device_id, cryptonight_core_gpu_phase3<ALGO><<<
(grid.x + 1) / 2,
block8.x * 2 ,
2 * block8.x * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
gridSizePhase3,
blockSizePhase3,
blockSizePhase3 * sizeof(uint32_t) * static_cast<int>(ctx->device_arch[0] < 3)>>>(
ITERATIONS,
MEM / 4,
ctx->device_blocks * ctx->device_threads,
Expand Down
19 changes: 18 additions & 1 deletion xmrstak/backend/nvidia/nvcc_code/cuda_extra.cu
Original file line number Diff line number Diff line change
Expand Up @@ -560,12 +560,13 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
ctx->device_mpcount = props.multiProcessorCount;
ctx->device_arch[0] = props.major;
ctx->device_arch[1] = props.minor;
ctx->device_maxThreadsPerBlock = props.maxThreadsPerBlock;

const int gpuArch = ctx->device_arch[0] * 10 + ctx->device_arch[1];

ctx->name = std::string(props.name);

printf("CUDA [%d.%d/%d.%d] GPU#%d, device architecture %d: \"%s\"... ",
printf("CUDA [%d.%d/%d.%d] GPU#%d, device architecture %d: \"%s\"...\n",
version / 1000, (version % 1000 / 10),
CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10,
ctx->device_id, gpuArch, ctx->device_name);
Expand Down Expand Up @@ -803,6 +804,22 @@ extern "C" int cuda_get_deviceinfo(nvid_ctx* ctx)
}
}
}

if(useCryptonight_gpu)
{
// cryptonight_gpu used 16 threads per share
if(ctx->device_threads * 16 > ctx->device_maxThreadsPerBlock)
{
ctx->device_threads = ctx->device_maxThreadsPerBlock / 16;
printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads);
}
}
else if(ctx->device_threads * 8 > ctx->device_maxThreadsPerBlock)
{
// by default cryptonight CUDA implementations uses 8 threads per thread for some kernel
ctx->device_threads = ctx->device_maxThreadsPerBlock / 8;
printf("WARNING: 'threads' configuration to large, value adjusted to %i\n", ctx->device_threads);
}
printf("device init succeeded\n");

return 0;
Expand Down

0 comments on commit 00a8479

Please sign in to comment.