NIGHTMARE!!!!

1741581 · Jul 10, 2015 · a08ef29 · a08ef29
1 parent d1d56a2
commit a08ef29
Show file tree

Hide file tree

Showing 24 changed files with 456 additions and 96 deletions.
diff --git a/Makefile b/Makefile
@@ -34,7 +34,7 @@ CFLAGS+= -DGPU
 LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
 endif
 
-OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o detection.o route_layer.o writing.o box.o
+OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o detection.o route_layer.o writing.o box.o nightmare.o
 ifeq ($(GPU), 1) 
 OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o
 endif
@@ -58,7 +58,6 @@ obj:
 results:
 	mkdir -p results
 
-
 .PHONY: clean
 
 clean:

diff --git a/cfg/strided.cfg b/cfg/strided.cfg
@@ -13,9 +13,9 @@ seen=0
 crop_height=224
 crop_width=224
 flip=1
-angle=15
-saturation=1.5
-exposure=1.5
+angle=0
+saturation=1
+exposure=1
 
 [convolutional]
 filters=64

diff --git a/cfg/vgg-16.cfg b/cfg/vgg-16.cfg
@@ -13,9 +13,9 @@ decay=0.0005
 crop_height=224
 crop_width=224
 flip=1
-exposure=2
-saturation=2
-angle=5
+exposure=1
+saturation=1
+angle=0
 
 [convolutional]
 filters=64

diff --git a/cfg/vgg-conv.cfg b/cfg/vgg-conv.cfg
@@ -0,0 +1,122 @@
+[net]
+batch=1
+subdivisions=1
+width=224
+height=224
+channels=3
+learning_rate=0.00001
+momentum=0.9
+seen=0
+decay=0.0005
+
+[convolutional]
+filters=64
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=64
+size=3
+stride=1
+pad=1
+activation=relu
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=128
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=128
+size=3
+stride=1
+pad=1
+activation=relu
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=256
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=256
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=256
+size=3
+stride=1
+pad=1
+activation=relu
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=relu
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=relu
+
+[convolutional]
+filters=512
+size=3
+stride=1
+pad=1
+activation=relu
+
+[maxpool]
+size=2
+stride=2
+
diff --git a/data/scream.jpg b/data/scream.jpg
diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu
@@ -8,6 +8,7 @@ __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));}
 __device__ float relu_activate_kernel(float x){return x*(x>0);}
 __device__ float relie_activate_kernel(float x){return x*(x>0);}
 __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
+__device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
 __device__ float tanh_activate_kernel(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
 __device__ float plse_activate_kernel(float x)
 {
@@ -21,6 +22,7 @@ __device__ float logistic_gradient_kernel(float x){return (1-x)*x;}
 __device__ float relu_gradient_kernel(float x){return (x>0);}
 __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;}
 __device__ float ramp_gradient_kernel(float x){return (x>0)+.1;}
+__device__ float leaky_gradient_kernel(float x){return (x>0) ? 1 : .1;}
 __device__ float tanh_gradient_kernel(float x){return 1-x*x;}
 __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;}
 
@@ -37,6 +39,8 @@ __device__ float activate_kernel(float x, ACTIVATION a)
             return relie_activate_kernel(x);
         case RAMP:
             return ramp_activate_kernel(x);
+        case LEAKY:
+            return leaky_activate_kernel(x);
         case TANH:
             return tanh_activate_kernel(x);
         case PLSE:
@@ -58,6 +62,8 @@ __device__ float gradient_kernel(float x, ACTIVATION a)
             return relie_gradient_kernel(x);
         case RAMP:
             return ramp_gradient_kernel(x);
+        case LEAKY:
+            return leaky_gradient_kernel(x);
         case TANH:
             return tanh_gradient_kernel(x);
         case PLSE:

diff --git a/src/activations.c b/src/activations.c
@@ -22,6 +22,8 @@ char *get_activation_string(ACTIVATION a)
             return "tanh";
         case PLSE:
             return "plse";
+        case LEAKY:
+            return "leaky";
         default:
             break;
     }
@@ -36,6 +38,7 @@ ACTIVATION get_activation(char *s)
     if (strcmp(s, "plse")==0) return PLSE;
     if (strcmp(s, "linear")==0) return LINEAR;
     if (strcmp(s, "ramp")==0) return RAMP;
+    if (strcmp(s, "leaky")==0) return LEAKY;
     if (strcmp(s, "tanh")==0) return TANH;
     fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s);
     return RELU;
@@ -54,6 +57,8 @@ float activate(float x, ACTIVATION a)
             return relie_activate(x);
         case RAMP:
             return ramp_activate(x);
+        case LEAKY:
+            return leaky_activate(x);
         case TANH:
             return tanh_activate(x);
         case PLSE:
@@ -83,6 +88,8 @@ float gradient(float x, ACTIVATION a)
             return relie_gradient(x);
         case RAMP:
             return ramp_gradient(x);
+        case LEAKY:
+            return leaky_gradient(x);
         case TANH:
             return tanh_gradient(x);
         case PLSE:

diff --git a/src/activations.h b/src/activations.h
@@ -4,7 +4,7 @@
 #include "math.h"
 
 typedef enum{
-    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE
+    LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY
 }ACTIVATION;
 
 ACTIVATION get_activation(char *s);
@@ -24,6 +24,7 @@ static inline float logistic_activate(float x){return 1./(1. + exp(-x));}
 static inline float relu_activate(float x){return x*(x>0);}
 static inline float relie_activate(float x){return x*(x>0);}
 static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
+static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
 static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
 static inline float plse_activate(float x)
 {
@@ -37,6 +38,7 @@ static inline float logistic_gradient(float x){return (1-x)*x;}
 static inline float relu_gradient(float x){return (x>0);}
 static inline float relie_gradient(float x){return (x>0) ? 1 : .01;}
 static inline float ramp_gradient(float x){return (x>0)+.1;}
+static inline float leaky_gradient(float x){return (x>0) ? 1 : .1;}
 static inline float tanh_gradient(float x){return 1-x*x;}
 static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;}
 

diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
@@ -97,12 +97,18 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int
     return l;
 }
 
-void resize_convolutional_layer(convolutional_layer *l, int h, int w)
+void resize_convolutional_layer(convolutional_layer *l, int w, int h)
 {
-    l->h = h;
     l->w = w;
-    int out_h = convolutional_out_height(*l);
+    l->h = h;
     int out_w = convolutional_out_width(*l);
+    int out_h = convolutional_out_height(*l);
+
+    l->out_w = out_w;
+    l->out_h = out_h;
+
+    l->outputs = l->out_h * l->out_w * l->out_c;
+    l->inputs = l->w * l->h * l->c;
 
     l->col_image = realloc(l->col_image,
                                 out_h*out_w*l->size*l->size*l->c*sizeof(float));
@@ -116,9 +122,9 @@ void resize_convolutional_layer(convolutional_layer *l, int h, int w)
     cuda_free(l->delta_gpu);
     cuda_free(l->output_gpu);
 
-    l->col_image_gpu = cuda_make_array(l->col_image, out_h*out_w*l->size*l->size*l->c);
-    l->delta_gpu = cuda_make_array(l->delta, l->batch*out_h*out_w*l->n);
-    l->output_gpu = cuda_make_array(l->output, l->batch*out_h*out_w*l->n);
+    l->col_image_gpu = cuda_make_array(0, out_h*out_w*l->size*l->size*l->c);
+    l->delta_gpu = cuda_make_array(0, l->batch*out_h*out_w*l->n);
+    l->output_gpu = cuda_make_array(0, l->batch*out_h*out_w*l->n);
     #endif
 }
 

diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
@@ -22,7 +22,7 @@ void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int
 #endif
 
 convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation);
-void resize_convolutional_layer(convolutional_layer *layer, int h, int w);
+void resize_convolutional_layer(convolutional_layer *layer, int w, int h);
 void forward_convolutional_layer(const convolutional_layer layer, network_state state);
 void update_convolutional_layer(convolutional_layer layer, int batch, float learning_rate, float momentum, float decay);
 image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters);

diff --git a/src/darknet.c b/src/darknet.c
@@ -13,41 +13,7 @@ extern void run_imagenet(int argc, char **argv);
 extern void run_detection(int argc, char **argv);
 extern void run_writing(int argc, char **argv);
 extern void run_captcha(int argc, char **argv);
-
-void del_arg(int argc, char **argv, int index)
-{
-    int i;
-    for(i = index; i < argc-1; ++i) argv[i] = argv[i+1];
-    argv[i] = 0;
-}
-
-int find_arg(int argc, char* argv[], char *arg)
-{
-    int i;
-    for(i = 0; i < argc; ++i) {
-        if(!argv[i]) continue;
-        if(0==strcmp(argv[i], arg)) {
-            del_arg(argc, argv, i);
-            return 1;
-        }
-    }
-    return 0;
-}
-
-int find_int_arg(int argc, char **argv, char *arg, int def)
-{
-    int i;
-    for(i = 0; i < argc-1; ++i){
-        if(!argv[i]) continue;
-        if(0==strcmp(argv[i], arg)){
-            def = atoi(argv[i+1]);
-            del_arg(argc, argv, i);
-            del_arg(argc, argv, i);
-            break;
-        }
-    }
-    return def;
-}
+extern void run_nightmare(int argc, char **argv);
 
 void change_rate(char *filename, float scale, float add)
 {
@@ -135,6 +101,8 @@ int main(int argc, char **argv)
         test_resize(argv[2]);
     } else if (0 == strcmp(argv[1], "captcha")){
         run_captcha(argc, argv);
+    } else if (0 == strcmp(argv[1], "nightmare")){
+        run_nightmare(argc, argv);
     } else if (0 == strcmp(argv[1], "change")){
         change_rate(argv[2], atof(argv[3]), (argc > 4) ? atof(argv[4]) : 0);
     } else if (0 == strcmp(argv[1], "rgbgr")){

diff --git a/src/image.c b/src/image.c
@@ -187,6 +187,7 @@ void show_image_cv(image p, char *name)
 {
     int x,y,k;
     image copy = copy_image(p);
+    constrain_image(copy);
     rgbgr_image(copy);
     //normalize_image(copy);
 
@@ -207,7 +208,8 @@ void show_image_cv(image p, char *name)
         }
     }
     free_image(copy);
-    if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
+    if(0){
+    //if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
         int w = 448;
         int h = w*p.h/p.w;
         if(h > 1000){

diff --git a/src/image.h b/src/image.h
@@ -37,6 +37,8 @@ void exposure_image(image im, float sat);
 void saturate_exposure_image(image im, float sat, float exposure);
 void hsv_to_rgb(image im);
 void rgbgr_image(image im);
+void constrain_image(image im);
+image grayscale_image(image im);
 
 image collapse_image_layers(image source, int border);
 image collapse_images_horz(image *ims, int n);

diff --git a/src/imagenet.c b/src/imagenet.c
@@ -48,7 +48,6 @@ void train_imagenet(char *cfgfile, char *weightfile)
         printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
         free_data(train);
         if((i % 30000) == 0) net.learning_rate *= .1;
-        //if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97;
         if(i%1000==0){
             char buff[256];
             sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i);

diff --git a/src/layer.h b/src/layer.h
@@ -48,6 +48,8 @@ typedef struct {
     int does_cost;
     int joint;
 
+    int dontload;
+
     float probability;
     float scale;
     int *indexes;