forked from github-linguist/linguist
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcuda.cu
39 lines (32 loc) · 1.2 KB
/
cuda.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
void foo()
{
cudaArray* cu_array;
texture<float, 2, cudaReadModeElementType> tex;
// Allocate array
cudaChannelFormatDesc description = cudaCreateChannelDesc<float>();
cudaMallocArray(&cu_array, &description, width, height);
// Copy image data to array
cudaMemcpyToArray(cu_array, image, width*height*sizeof(float), cudaMemcpyHostToDevice);
// Set texture parameters (default)
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.filterMode = cudaFilterModePoint;
tex.normalized = false; // do not normalize coordinates
// Bind the array to the texture
cudaBindTextureToArray(tex, cu_array);
// Run kernel
dim3 blockDim(16, 16, 1);
dim3 gridDim((width + blockDim.x - 1)/ blockDim.x, (height + blockDim.y - 1) / blockDim.y, 1);
kernel<<< gridDim, blockDim, 0 >>>(d_data, height, width);
// Unbind the array from the texture
cudaUnbindTexture(tex);
} //end foo()
__global__ void kernel(float* odata, int height, int width)
{
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;
if (x < width && y < height) {
float c = tex2D(tex, x, y);
odata[y*width+x] = c;
}
}