Skip to content

Commit

Permalink
Feat: Added opencl kernels.
Browse files Browse the repository at this point in the history
  • Loading branch information
GertRozing committed Jul 3, 2017
1 parent 4566746 commit 4fe51b4
Show file tree
Hide file tree
Showing 15 changed files with 2,574 additions and 11 deletions.
58 changes: 58 additions & 0 deletions src/activation_kernels.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <string.h>
#include "activations.h"
#include "cuda.h"
#include "activation_kernels.cl"

#ifdef OPENCL

cl_program opencl_activation_kernel_program = 0;
cl_kernel opencl_activate_array_kernel = 0;
cl_kernel opencl_gradient_array_kernel = 0;

void activation_kernels_init(void)
{
opencl_load_buffer(activation_kernels_source, strlen(activation_kernels_source), &opencl_activation_kernel_program);
opencl_create_kernel(&opencl_activation_kernel_program,
"activate_array_kernel", &opencl_activate_array_kernel);
opencl_create_kernel(&opencl_activation_kernel_program,
"gradient_array_kernel", &opencl_gradient_array_kernel);
}

void activation_kernels_release(void)
{
clReleaseKernel(opencl_activate_array_kernel);
clReleaseKernel(opencl_gradient_array_kernel);
clReleaseProgram(opencl_activation_kernel_program);

opencl_activate_array_kernel = 0;
opencl_gradient_array_kernel = 0;
opencl_activation_kernel_program = 0;
}

void activate_array_offset_ongpu(cl_mem x, int offset, int n, ACTIVATION a)
{
dim3 dimN, dimBlock;
dimN = cuda_gridsize(n);
dimBlock = dim3_create(BLOCK, 1, 1);
opencl_kernel(opencl_activate_array_kernel, dimN, dimBlock, 8, &x, sizeof(cl_mem), &offset, sizeof(cl_int), &n, sizeof(cl_int), &a, sizeof(cl_int));
}

void activate_array_ongpu(cl_mem x, int n, ACTIVATION a)
{
activate_array_offset_ongpu(x, 0, n, a);
}

void gradient_array_offset_ongpu(cl_mem x, int offset, int n, ACTIVATION a, cl_mem delta)
{
dim3 dimN, dimBlock;
dimN = cuda_gridsize(n);
dimBlock = dim3_create(BLOCK, 1, 1);
opencl_kernel(opencl_gradient_array_kernel, dimN, dimBlock, 10, &x, sizeof(cl_mem), &offset, sizeof(cl_int), &n, sizeof(cl_int), &a, sizeof(cl_int), &delta, sizeof(cl_mem));
}

void gradient_array_ongpu(cl_mem x, int n, ACTIVATION a, cl_mem delta)
{
gradient_array_offset_ongpu(x, 0, n, a, delta);
}

#endif // OPENCL
153 changes: 153 additions & 0 deletions src/activation_kernels.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
#ifndef __ACTIVATION_KERNELS_CL__
#define __ACTIVATION_KERNELS_CL__

static const char* const activation_kernels_source = CONVERT_KERNEL_TO_STRING(
typedef enum{
LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY, STAIR, HARDTAN, LHTAN
}ACTIVATION;

float lhtan_activate_kernel(float x)
{
if(x < 0) return .001*x;
if(x > 1) return .001*(x-1) + 1;
return x;
}
float lhtan_gradient_kernel(float x)
{
if(x > 0 && x < 1) return 1;
return .001;
}

float hardtan_activate_kernel(float x)
{
if (x < -1) return -1;
if (x > 1) return 1;
return x;
}
float linear_activate_kernel(float x){return x;}
float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
float relu_activate_kernel(float x){return x*(x>0);}
float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
float relie_activate_kernel(float x){return (x>0) ? x : .01*x;}
float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
float plse_activate_kernel(float x)
{
if(x < -4) return .01 * (x + 4);
if(x > 4) return .01 * (x - 4) + 1;
return .125*x + .5;
}
float stair_activate_kernel(float x)
{
int n = floor(x);
if (n%2 == 0) return floor(x/2.);
else return (x - n) + floor(x/2.);
}


float hardtan_gradient_kernel(float x)
{
if (x > -1 && x < 1) return 1;
return 0;
}
float linear_gradient_kernel(float x){return 1;}
float logistic_gradient_kernel(float x){return (1-x)*x;}
float loggy_gradient_kernel(float x)
{
float y = (x+1.)/2.;
return 2*(1-y)*y;
}
float relu_gradient_kernel(float x){return (x>0);}
float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);}
float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
float ramp_gradient_kernel(float x){return (x>0)+.1;}
float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;}
float tanh_gradient_kernel(float x){return 1-x*x;}
float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;}
float stair_gradient_kernel(float x)
{
if (floor(x) == x) return 0;
return 1;
}

float activate_kernel(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_activate_kernel(x);
case LOGISTIC:
return logistic_activate_kernel(x);
case LOGGY:
return loggy_activate_kernel(x);
case RELU:
return relu_activate_kernel(x);
case ELU:
return elu_activate_kernel(x);
case RELIE:
return relie_activate_kernel(x);
case RAMP:
return ramp_activate_kernel(x);
case LEAKY:
return leaky_activate_kernel(x);
case TANH:
return tanh_activate_kernel(x);
case PLSE:
return plse_activate_kernel(x);
case STAIR:
return stair_activate_kernel(x);
case HARDTAN:
return hardtan_activate_kernel(x);
case LHTAN:
return lhtan_activate_kernel(x);
}
return 0;
}

float gradient_kernel(float x, ACTIVATION a)
{
switch(a){
case LINEAR:
return linear_gradient_kernel(x);
case LOGISTIC:
return logistic_gradient_kernel(x);
case LOGGY:
return loggy_gradient_kernel(x);
case RELU:
return relu_gradient_kernel(x);
case ELU:
return elu_gradient_kernel(x);
case RELIE:
return relie_gradient_kernel(x);
case RAMP:
return ramp_gradient_kernel(x);
case LEAKY:
return leaky_gradient_kernel(x);
case TANH:
return tanh_gradient_kernel(x);
case PLSE:
return plse_gradient_kernel(x);
case STAIR:
return stair_gradient_kernel(x);
case HARDTAN:
return hardtan_gradient_kernel(x);
case LHTAN:
return lhtan_gradient_kernel(x);
}
return 0;
}

__kernel void activate_array_kernel(__global float *x, int offset, int n, ACTIVATION a)
{
int i = (get_group_id(0) + get_group_id(1)*get_num_groups(0)) * get_local_size(0) + get_local_id(0);
if(i < n) x[i + offset] = activate_kernel(x[i + offset], a);
}

__kernel void gradient_array_kernel(__global float *x, int offset, int n, ACTIVATION a, __global float *delta)
{
int i = (get_group_id(0) + get_group_id(1)*get_num_groups(0)) * get_local_size(0) + get_local_id(0);
if(i < n) delta[i + offset] *= gradient_kernel(x[i + offset], a);
}
);
#endif
24 changes: 17 additions & 7 deletions src/activation_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -139,26 +139,36 @@ __device__ float gradient_kernel(float x, ACTIVATION a)
return 0;
}

__global__ void activate_array_kernel(float *x, int n, ACTIVATION a)
__global__ void activate_array_kernel(float *x, int offset, int n, ACTIVATION a)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x + offset;
if(i < n) x[i] = activate_kernel(x[i], a);
}

__global__ void gradient_array_kernel(float *x, int n, ACTIVATION a, float *delta)
__global__ void gradient_array_kernel(float *x, int offset, int n, ACTIVATION a, float *delta)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x + offset;
if(i < n) delta[i] *= gradient_kernel(x[i], a);
}

void activate_array_offset_ongpu(float *x, int offset, int n, ACTIVATION a)
{
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, offset, n, a);
check_error(cudaPeekAtLastError());
}

void activate_array_ongpu(float *x, int n, ACTIVATION a)
{
activate_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a);
activate_array_offset_ongpu(x, 0, n, a);
}

void gradient_array_offset_ongpu(float *x, int offset, int n, ACTIVATION a, float *delta, offset2)
{
gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, offset, n, a, delta, offset2);
check_error(cudaPeekAtLastError());
}

void gradient_array_ongpu(float *x, int n, ACTIVATION a, float *delta)
{
gradient_array_kernel<<<cuda_gridsize(n), BLOCK>>>(x, n, a, delta);
check_error(cudaPeekAtLastError());
gradient_array_offset_ongpu(x, 0, n, a, delta, 0);
}
Loading

0 comments on commit 4fe51b4

Please sign in to comment.