rail_object_detector: convolutional

Go to the documentation of this file.
00001 #include "convolutional_layer.h"
00002 #include "utils.h"
00003 #include "batchnorm_layer.h"
00004 #include "im2col.h"
00005 #include "col2im.h"
00006 #include "blas.h"
00007 #include "gemm.h"
00008 #include <stdio.h>
00009 #include <time.h>
00010 
00011 #ifdef AI2
00012 #include "xnor_layer.h"
00013 #endif
00014 
00015 #ifndef AI2
00016 #define AI2 0
00017 void forward_xnor_layer(layer l, network_state state);
00018 #endif
00019 
00020 void swap_binary(convolutional_layer *l)
00021 {
00022     float *swap = l->weights;
00023     l->weights = l->binary_weights;
00024     l->binary_weights = swap;
00025 
00026     #ifdef GPU
00027     swap = l->weights_gpu;
00028     l->weights_gpu = l->binary_weights_gpu;
00029     l->binary_weights_gpu = swap;
00030     #endif
00031 }
00032 
00033 void binarize_weights(float *weights, int n, int size, float *binary)
00034 {
00035     int i, f;
00036     for(f = 0; f < n; ++f){
00037         float mean = 0;
00038         for(i = 0; i < size; ++i){
00039             mean += fabs(weights[f*size + i]);
00040         }
00041         mean = mean / size;
00042         for(i = 0; i < size; ++i){
00043             binary[f*size + i] = (weights[f*size + i] > 0) ? mean : -mean;
00044         }
00045     }
00046 }
00047 
00048 void binarize_cpu(float *input, int n, float *binary)
00049 {
00050     int i;
00051     for(i = 0; i < n; ++i){
00052         binary[i] = (input[i] > 0) ? 1 : -1;
00053     }
00054 }
00055 
00056 void binarize_input(float *input, int n, int size, float *binary)
00057 {
00058     int i, s;
00059     for(s = 0; s < size; ++s){
00060         float mean = 0;
00061         for(i = 0; i < n; ++i){
00062             mean += fabs(input[i*size + s]);
00063         }
00064         mean = mean / n;
00065         for(i = 0; i < n; ++i){
00066             binary[i*size + s] = (input[i*size + s] > 0) ? mean : -mean;
00067         }
00068     }
00069 }
00070 
00071 int convolutional_out_height(convolutional_layer l)
00072 {
00073     return (l.h + 2*l.pad - l.size) / l.stride + 1;
00074 }
00075 
00076 int convolutional_out_width(convolutional_layer l)
00077 {
00078     return (l.w + 2*l.pad - l.size) / l.stride + 1;
00079 }
00080 
00081 image get_convolutional_image(convolutional_layer l)
00082 {
00083     int h,w,c;
00084     h = convolutional_out_height(l);
00085     w = convolutional_out_width(l);
00086     c = l.n;
00087     return float_to_image(w,h,c,l.output);
00088 }
00089 
00090 image get_convolutional_delta(convolutional_layer l)
00091 {
00092     int h,w,c;
00093     h = convolutional_out_height(l);
00094     w = convolutional_out_width(l);
00095     c = l.n;
00096     return float_to_image(w,h,c,l.delta);
00097 }
00098 
00099 size_t get_workspace_size(layer l){
00100 #ifdef CUDNN
00101     if(gpu_index >= 0){
00102         size_t most = 0;
00103         size_t s = 0;
00104         cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(),
00105                 l.srcTensorDesc,
00106                 l.weightDesc,
00107                 l.convDesc,
00108                 l.dstTensorDesc,
00109                 l.fw_algo,
00110                 &s);
00111         if (s > most) most = s;
00112         cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(),
00113                 l.srcTensorDesc,
00114                 l.ddstTensorDesc,
00115                 l.convDesc,
00116                 l.dweightDesc,
00117                 l.bf_algo,
00118                 &s);
00119         if (s > most) most = s;
00120         cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(),
00121                 l.weightDesc,
00122                 l.ddstTensorDesc,
00123                 l.convDesc,
00124                 l.dsrcTensorDesc,
00125                 l.bd_algo,
00126                 &s);
00127         if (s > most) most = s;
00128         return most;
00129     }
00130     #endif
00131     return (size_t)l.out_h*l.out_w*l.size*l.size*l.c*sizeof(float);
00132 }
00133 
00134 #ifdef GPU
00135 #ifdef CUDNN
00136 void cudnn_convolutional_setup(layer *l)
00137 {
00138     cudnnSetTensor4dDescriptor(l->dsrcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
00139     cudnnSetTensor4dDescriptor(l->ddstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 
00140     cudnnSetFilter4dDescriptor(l->dweightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size); 
00141 
00142     cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
00143     cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 
00144     cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size); 
00145     cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
00146     cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
00147             l->srcTensorDesc,
00148             l->weightDesc,
00149             l->convDesc,
00150             l->dstTensorDesc,
00151             CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
00152             0,
00153             &l->fw_algo);
00154     cudnnGetConvolutionBackwardDataAlgorithm(cudnn_handle(),
00155             l->weightDesc,
00156             l->ddstTensorDesc,
00157             l->convDesc,
00158             l->dsrcTensorDesc,
00159             CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST,
00160             0,
00161             &l->bd_algo);
00162     cudnnGetConvolutionBackwardFilterAlgorithm(cudnn_handle(),
00163             l->srcTensorDesc,
00164             l->ddstTensorDesc,
00165             l->convDesc,
00166             l->dweightDesc,
00167             CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST,
00168             0,
00169             &l->bf_algo);
00170 }
00171 #endif
00172 #endif
00173 
00174 convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor, int adam)
00175 {
00176     int i;
00177     convolutional_layer l = {0};
00178     l.type = CONVOLUTIONAL;
00179 
00180     l.h = h;
00181     l.w = w;
00182     l.c = c;
00183     l.n = n;
00184     l.binary = binary;
00185     l.xnor = xnor;
00186     l.batch = batch;
00187     l.stride = stride;
00188     l.size = size;
00189     l.pad = padding;
00190     l.batch_normalize = batch_normalize;
00191 
00192     l.weights = calloc(c*n*size*size, sizeof(float));
00193     l.weight_updates = calloc(c*n*size*size, sizeof(float));
00194 
00195     l.biases = calloc(n, sizeof(float));
00196     l.bias_updates = calloc(n, sizeof(float));
00197 
00198     // float scale = 1./sqrt(size*size*c);
00199     float scale = sqrt(2./(size*size*c));
00200     for(i = 0; i < c*n*size*size; ++i) l.weights[i] = scale*rand_uniform(-1, 1);
00201     int out_h = convolutional_out_height(l);
00202     int out_w = convolutional_out_width(l);
00203     l.out_h = out_h;
00204     l.out_w = out_w;
00205     l.out_c = n;
00206     l.outputs = l.out_h * l.out_w * l.out_c;
00207     l.inputs = l.w * l.h * l.c;
00208 
00209     l.output = calloc(l.batch*l.outputs, sizeof(float));
00210     l.delta  = calloc(l.batch*l.outputs, sizeof(float));
00211 
00212     l.forward = forward_convolutional_layer;
00213     l.backward = backward_convolutional_layer;
00214     l.update = update_convolutional_layer;
00215     if(binary){
00216         l.binary_weights = calloc(c*n*size*size, sizeof(float));
00217         l.cweights = calloc(c*n*size*size, sizeof(char));
00218         l.scales = calloc(n, sizeof(float));
00219     }
00220     if(xnor){
00221         l.binary_weights = calloc(c*n*size*size, sizeof(float));
00222         l.binary_input = calloc(l.inputs*l.batch, sizeof(float));
00223     }
00224 
00225     if(batch_normalize){
00226         l.scales = calloc(n, sizeof(float));
00227         l.scale_updates = calloc(n, sizeof(float));
00228         for(i = 0; i < n; ++i){
00229             l.scales[i] = 1;
00230         }
00231 
00232         l.mean = calloc(n, sizeof(float));
00233         l.variance = calloc(n, sizeof(float));
00234 
00235         l.mean_delta = calloc(n, sizeof(float));
00236         l.variance_delta = calloc(n, sizeof(float));
00237 
00238         l.rolling_mean = calloc(n, sizeof(float));
00239         l.rolling_variance = calloc(n, sizeof(float));
00240         l.x = calloc(l.batch*l.outputs, sizeof(float));
00241         l.x_norm = calloc(l.batch*l.outputs, sizeof(float));
00242     }
00243     if(adam){
00244         l.adam = 1;
00245         l.m = calloc(c*n*size*size, sizeof(float));
00246         l.v = calloc(c*n*size*size, sizeof(float));
00247     }
00248 
00249 #ifdef GPU
00250     l.forward_gpu = forward_convolutional_layer_gpu;
00251     l.backward_gpu = backward_convolutional_layer_gpu;
00252     l.update_gpu = update_convolutional_layer_gpu;
00253 
00254     if(gpu_index >= 0){
00255         if (adam) {
00256             l.m_gpu = cuda_make_array(l.m, c*n*size*size);
00257             l.v_gpu = cuda_make_array(l.v, c*n*size*size);
00258         }
00259 
00260         l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
00261         l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);
00262 
00263         l.biases_gpu = cuda_make_array(l.biases, n);
00264         l.bias_updates_gpu = cuda_make_array(l.bias_updates, n);
00265 
00266         l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n);
00267         l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
00268 
00269         if(binary){
00270             l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
00271         }
00272         if(xnor){
00273             l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size);
00274             l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch);
00275         }
00276 
00277         if(batch_normalize){
00278             l.mean_gpu = cuda_make_array(l.mean, n);
00279             l.variance_gpu = cuda_make_array(l.variance, n);
00280 
00281             l.rolling_mean_gpu = cuda_make_array(l.mean, n);
00282             l.rolling_variance_gpu = cuda_make_array(l.variance, n);
00283 
00284             l.mean_delta_gpu = cuda_make_array(l.mean, n);
00285             l.variance_delta_gpu = cuda_make_array(l.variance, n);
00286 
00287             l.scales_gpu = cuda_make_array(l.scales, n);
00288             l.scale_updates_gpu = cuda_make_array(l.scale_updates, n);
00289 
00290             l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
00291             l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n);
00292         }
00293 #ifdef CUDNN
00294         cudnnCreateTensorDescriptor(&l.srcTensorDesc);
00295         cudnnCreateTensorDescriptor(&l.dstTensorDesc);
00296         cudnnCreateFilterDescriptor(&l.weightDesc);
00297         cudnnCreateTensorDescriptor(&l.dsrcTensorDesc);
00298         cudnnCreateTensorDescriptor(&l.ddstTensorDesc);
00299         cudnnCreateFilterDescriptor(&l.dweightDesc);
00300         cudnnCreateConvolutionDescriptor(&l.convDesc);
00301         cudnn_convolutional_setup(&l);
00302 #endif
00303     }
00304 #endif
00305     l.workspace_size = get_workspace_size(l);
00306     l.activation = activation;
00307 
00308     fprintf(stderr, "conv  %5d %2d x%2d /%2d  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", n, size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c);
00309 
00310     return l;
00311 }
00312 
00313 void denormalize_convolutional_layer(convolutional_layer l)
00314 {
00315     int i, j;
00316     for(i = 0; i < l.n; ++i){
00317         float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
00318         for(j = 0; j < l.c*l.size*l.size; ++j){
00319             l.weights[i*l.c*l.size*l.size + j] *= scale;
00320         }
00321         l.biases[i] -= l.rolling_mean[i] * scale;
00322         l.scales[i] = 1;
00323         l.rolling_mean[i] = 0;
00324         l.rolling_variance[i] = 1;
00325     }
00326 }
00327 
00328 void test_convolutional_layer()
00329 {
00330     convolutional_layer l = make_convolutional_layer(1, 5, 5, 3, 2, 5, 2, 1, LEAKY, 1, 0, 0, 0);
00331     l.batch_normalize = 1;
00332     float data[] = {1,1,1,1,1,
00333         1,1,1,1,1,
00334         1,1,1,1,1,
00335         1,1,1,1,1,
00336         1,1,1,1,1,
00337         2,2,2,2,2,
00338         2,2,2,2,2,
00339         2,2,2,2,2,
00340         2,2,2,2,2,
00341         2,2,2,2,2,
00342         3,3,3,3,3,
00343         3,3,3,3,3,
00344         3,3,3,3,3,
00345         3,3,3,3,3,
00346         3,3,3,3,3};
00347     network_state state = {0};
00348     state.input = data;
00349     forward_convolutional_layer(l, state);
00350 }
00351 
00352 void resize_convolutional_layer(convolutional_layer *l, int w, int h)
00353 {
00354     l->w = w;
00355     l->h = h;
00356     int out_w = convolutional_out_width(*l);
00357     int out_h = convolutional_out_height(*l);
00358 
00359     l->out_w = out_w;
00360     l->out_h = out_h;
00361 
00362     l->outputs = l->out_h * l->out_w * l->out_c;
00363     l->inputs = l->w * l->h * l->c;
00364 
00365     l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
00366     l->delta  = realloc(l->delta,  l->batch*l->outputs*sizeof(float));
00367     if(l->batch_normalize){
00368         l->x = realloc(l->x, l->batch*l->outputs*sizeof(float));
00369         l->x_norm  = realloc(l->x_norm, l->batch*l->outputs*sizeof(float));
00370     }
00371 
00372 #ifdef GPU
00373     cuda_free(l->delta_gpu);
00374     cuda_free(l->output_gpu);
00375 
00376     l->delta_gpu =  cuda_make_array(l->delta,  l->batch*l->outputs);
00377     l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs);
00378 
00379     if(l->batch_normalize){
00380         cuda_free(l->x_gpu);
00381         cuda_free(l->x_norm_gpu);
00382 
00383         l->x_gpu = cuda_make_array(l->output, l->batch*l->outputs);
00384         l->x_norm_gpu = cuda_make_array(l->output, l->batch*l->outputs);
00385     }
00386 #ifdef CUDNN
00387     cudnn_convolutional_setup(l);
00388 #endif
00389 #endif
00390     l->workspace_size = get_workspace_size(*l);
00391 }
00392 
00393 void add_bias(float *output, float *biases, int batch, int n, int size)
00394 {
00395     int i,j,b;
00396     for(b = 0; b < batch; ++b){
00397         for(i = 0; i < n; ++i){
00398             for(j = 0; j < size; ++j){
00399                 output[(b*n + i)*size + j] += biases[i];
00400             }
00401         }
00402     }
00403 }
00404 
00405 void scale_bias(float *output, float *scales, int batch, int n, int size)
00406 {
00407     int i,j,b;
00408     for(b = 0; b < batch; ++b){
00409         for(i = 0; i < n; ++i){
00410             for(j = 0; j < size; ++j){
00411                 output[(b*n + i)*size + j] *= scales[i];
00412             }
00413         }
00414     }
00415 }
00416 
00417 void backward_bias(float *bias_updates, float *delta, int batch, int n, int size)
00418 {
00419     int i,b;
00420     for(b = 0; b < batch; ++b){
00421         for(i = 0; i < n; ++i){
00422             bias_updates[i] += sum_array(delta+size*(i+b*n), size);
00423         }
00424     }
00425 }
00426 
00427 void forward_convolutional_layer(convolutional_layer l, network_state state)
00428 {
00429     int out_h = convolutional_out_height(l);
00430     int out_w = convolutional_out_width(l);
00431     int i;
00432 
00433     fill_cpu(l.outputs*l.batch, 0, l.output, 1);
00434 
00435     if(l.xnor){
00436         binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights);
00437         swap_binary(&l);
00438         binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input);
00439         state.input = l.binary_input;
00440     }
00441 
00442     int m = l.n;
00443     int k = l.size*l.size*l.c;
00444     int n = out_h*out_w;
00445 
00446 
00447     float *a = l.weights;
00448     float *b = state.workspace;
00449     float *c = l.output;
00450 
00451     for(i = 0; i < l.batch; ++i){
00452         im2col_cpu(state.input, l.c, l.h, l.w, 
00453                 l.size, l.stride, l.pad, b);
00454         gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
00455         c += n*m;
00456         state.input += l.c*l.h*l.w;
00457     }
00458 
00459     if(l.batch_normalize){
00460         forward_batchnorm_layer(l, state);
00461     }
00462     add_bias(l.output, l.biases, l.batch, l.n, out_h*out_w);
00463 
00464     activate_array(l.output, m*n*l.batch, l.activation);
00465     if(l.binary || l.xnor) swap_binary(&l);
00466 }
00467 
00468 void backward_convolutional_layer(convolutional_layer l, network_state state)
00469 {
00470     int i;
00471     int m = l.n;
00472     int n = l.size*l.size*l.c;
00473     int k = convolutional_out_height(l)*
00474         convolutional_out_width(l);
00475 
00476     gradient_array(l.output, m*k*l.batch, l.activation, l.delta);
00477     backward_bias(l.bias_updates, l.delta, l.batch, l.n, k);
00478 
00479     if(l.batch_normalize){
00480         backward_batchnorm_layer(l, state);
00481     }
00482 
00483     for(i = 0; i < l.batch; ++i){
00484         float *a = l.delta + i*m*k;
00485         float *b = state.workspace;
00486         float *c = l.weight_updates;
00487 
00488         float *im = state.input+i*l.c*l.h*l.w;
00489 
00490         im2col_cpu(im, l.c, l.h, l.w, 
00491                 l.size, l.stride, l.pad, b);
00492         gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
00493 
00494         if(state.delta){
00495             a = l.weights;
00496             b = l.delta + i*m*k;
00497             c = state.workspace;
00498 
00499             gemm(1,0,n,k,m,1,a,n,b,k,0,c,k);
00500 
00501             col2im_cpu(state.workspace, l.c,  l.h,  l.w,  l.size,  l.stride, l.pad, state.delta+i*l.c*l.h*l.w);
00502         }
00503     }
00504 }
00505 
00506 void update_convolutional_layer(convolutional_layer l, int batch, float learning_rate, float momentum, float decay)
00507 {
00508     int size = l.size*l.size*l.c*l.n;
00509     axpy_cpu(l.n, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
00510     scal_cpu(l.n, momentum, l.bias_updates, 1);
00511 
00512     if(l.scales){
00513         axpy_cpu(l.n, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
00514         scal_cpu(l.n, momentum, l.scale_updates, 1);
00515     }
00516 
00517     axpy_cpu(size, -decay*batch, l.weights, 1, l.weight_updates, 1);
00518     axpy_cpu(size, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
00519     scal_cpu(size, momentum, l.weight_updates, 1);
00520 }
00521 
00522 
00523 image get_convolutional_weight(convolutional_layer l, int i)
00524 {
00525     int h = l.size;
00526     int w = l.size;
00527     int c = l.c;
00528     return float_to_image(w,h,c,l.weights+i*h*w*c);
00529 }
00530 
00531 void rgbgr_weights(convolutional_layer l)
00532 {
00533     int i;
00534     for(i = 0; i < l.n; ++i){
00535         image im = get_convolutional_weight(l, i);
00536         if (im.c == 3) {
00537             rgbgr_image(im);
00538         }
00539     }
00540 }
00541 
00542 void rescale_weights(convolutional_layer l, float scale, float trans)
00543 {
00544     int i;
00545     for(i = 0; i < l.n; ++i){
00546         image im = get_convolutional_weight(l, i);
00547         if (im.c == 3) {
00548             scale_image(im, scale);
00549             float sum = sum_array(im.data, im.w*im.h*im.c);
00550             l.biases[i] += sum*trans;
00551         }
00552     }
00553 }
00554 
00555 image *get_weights(convolutional_layer l)
00556 {
00557     image *weights = calloc(l.n, sizeof(image));
00558     int i;
00559     for(i = 0; i < l.n; ++i){
00560         weights[i] = copy_image(get_convolutional_weight(l, i));
00561         //normalize_image(weights[i]);
00562     }
00563     return weights;
00564 }
00565 
00566 image *visualize_convolutional_layer(convolutional_layer l, char *window, image *prev_weights)
00567 {
00568     image *single_weights = get_weights(l);
00569     show_images(single_weights, l.n, window);
00570 
00571     image delta = get_convolutional_image(l);
00572     image dc = collapse_image_layers(delta, 1);
00573     char buff[256];
00574     sprintf(buff, "%s: Output", window);
00575     //show_image(dc, buff);
00576     //save_image(dc, buff);
00577     free_image(dc);
00578     return single_weights;
00579 }
00580