Skip to content

Commit

Permalink
Adds a CUDA "sleep" kernel
Browse files Browse the repository at this point in the history
Adds a CUDA "sleep" kernel which spins for the given number of
iterations. This is useful for testing correct synchronization with
streams.
  • Loading branch information
colesbury committed Dec 1, 2016
1 parent ce43bc5 commit 9d8e13d
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 0 deletions.
11 changes: 11 additions & 0 deletions init.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include "luaT.h"
#include "THCGeneral.h"
#include "THCCachingAllocator.h"
#include "THCSleep.h"
#include "THCTensorRandom.h"
#include "THCHalf.h" // for CUDA_HALF_TENSOR

Expand Down Expand Up @@ -938,6 +939,15 @@ static int cutorch_hasFastHalfInstructions(lua_State *L) {
return 1;
}

static int cutorch_sleep(lua_State *L) {
THCState *state = cutorch_getstate(L);
if (!luaT_checklong(L, 1)) {
THError("expected number 'cycles'");
}
THC_sleep(state, luaT_tolong(L, 1));
return 0;
}

static const struct luaL_Reg cutorch_stuff__ [] = {
{"synchronize", cutorch_synchronize},
{"synchronizeAll", cutorch_synchronizeAll},
Expand Down Expand Up @@ -972,6 +982,7 @@ static const struct luaL_Reg cutorch_stuff__ [] = {
{"initialSeed", cutorch_initialSeed},
{"manualSeed", cutorch_manualSeed},
{"manualSeedAll", cutorch_manualSeedAll},
{"_sleep", cutorch_sleep},
{"getRNGState", cutorch_getRNGState},
{"setRNGState", cutorch_setRNGState},
{"getState", cutorch_getState},
Expand Down
2 changes: 2 additions & 0 deletions lib/THC/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ SET(src
SET(src-cuda
THCReduceApplyUtils.cu
THCBlas.cu
THCSleep.cu
THCStorage.cu
THCStorageCopy.cu
THCTensor.cu
Expand Down Expand Up @@ -199,6 +200,7 @@ INSTALL(FILES
THC.h
${CMAKE_CURRENT_BINARY_DIR}/THCGeneral.h
THCBlas.h
THCSleep.h
THCStorage.h
THCStorageCopy.h
THCStream.h
Expand Down
1 change: 1 addition & 0 deletions lib/THC/THC.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "THCAllocator.h"
#include "THCBlas.h"
#include "THCCachingAllocator.h"
#include "THCSleep.h"
#include "THCStorage.h"
#include "THCStorageCopy.h"
#include "THCStream.h"
Expand Down
21 changes: 21 additions & 0 deletions lib/THC/THCSleep.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include "THCSleep.h"


__global__ void spin_kernel(long long cycles)
{
// see concurrentKernels CUDA sampl
long long start_clock = clock64();
long long clock_offset = 0;
while (clock_offset < cycles)
{
clock_offset = clock64() - start_clock;
}
}

THC_API void THC_sleep(THCState* state, long long cycles)
{
dim3 grid(1);
dim3 block(1);
spin_kernel<<<grid, block, 0, THCState_getCurrentStream(state)>>>(cycles);
THCudaCheck(cudaGetLastError());
}
10 changes: 10 additions & 0 deletions lib/THC/THCSleep.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#ifndef THC_SPIN_INC
#define THC_SPIN_INC

#include "THCGeneral.h"
#include <time.h>

// enqueues a kernel that spins for the specified number of cycles
THC_API void THC_sleep(THCState* state, long long cycles);

#endif

0 comments on commit 9d8e13d

Please sign in to comment.