connected_layer.c
Go to the documentation of this file.
00001 #include "connected_layer.h"
00002 #include "batchnorm_layer.h"
00003 #include "utils.h"
00004 #include "cuda.h"
00005 #include "blas.h"
00006 #include "gemm.h"
00007 
00008 #include <math.h>
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 
00013 connected_layer make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, int batch_normalize)
00014 {
00015     int i;
00016     connected_layer l = {0};
00017     l.type = CONNECTED;
00018 
00019     l.inputs = inputs;
00020     l.outputs = outputs;
00021     l.batch=batch;
00022     l.batch_normalize = batch_normalize;
00023     l.h = 1;
00024     l.w = 1;
00025     l.c = inputs;
00026     l.out_h = 1;
00027     l.out_w = 1;
00028     l.out_c = outputs;
00029 
00030     l.output = calloc(batch*outputs, sizeof(float));
00031     l.delta = calloc(batch*outputs, sizeof(float));
00032 
00033     l.weight_updates = calloc(inputs*outputs, sizeof(float));
00034     l.bias_updates = calloc(outputs, sizeof(float));
00035 
00036     l.weights = calloc(outputs*inputs, sizeof(float));
00037     l.biases = calloc(outputs, sizeof(float));
00038 
00039     l.forward = forward_connected_layer;
00040     l.backward = backward_connected_layer;
00041     l.update = update_connected_layer;
00042 
00043     //float scale = 1./sqrt(inputs);
00044     float scale = sqrt(2./inputs);
00045     for(i = 0; i < outputs*inputs; ++i){
00046         l.weights[i] = scale*rand_uniform(-1, 1);
00047     }
00048 
00049     for(i = 0; i < outputs; ++i){
00050         l.biases[i] = 0;
00051     }
00052 
00053     if(batch_normalize){
00054         l.scales = calloc(outputs, sizeof(float));
00055         l.scale_updates = calloc(outputs, sizeof(float));
00056         for(i = 0; i < outputs; ++i){
00057             l.scales[i] = 1;
00058         }
00059 
00060         l.mean = calloc(outputs, sizeof(float));
00061         l.mean_delta = calloc(outputs, sizeof(float));
00062         l.variance = calloc(outputs, sizeof(float));
00063         l.variance_delta = calloc(outputs, sizeof(float));
00064 
00065         l.rolling_mean = calloc(outputs, sizeof(float));
00066         l.rolling_variance = calloc(outputs, sizeof(float));
00067 
00068         l.x = calloc(batch*outputs, sizeof(float));
00069         l.x_norm = calloc(batch*outputs, sizeof(float));
00070     }
00071 
00072 #ifdef GPU
00073     l.forward_gpu = forward_connected_layer_gpu;
00074     l.backward_gpu = backward_connected_layer_gpu;
00075     l.update_gpu = update_connected_layer_gpu;
00076 
00077     l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
00078     l.biases_gpu = cuda_make_array(l.biases, outputs);
00079 
00080     l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
00081     l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);
00082 
00083     l.output_gpu = cuda_make_array(l.output, outputs*batch);
00084     l.delta_gpu = cuda_make_array(l.delta, outputs*batch);
00085     if(batch_normalize){
00086         l.scales_gpu = cuda_make_array(l.scales, outputs);
00087         l.scale_updates_gpu = cuda_make_array(l.scale_updates, outputs);
00088 
00089         l.mean_gpu = cuda_make_array(l.mean, outputs);
00090         l.variance_gpu = cuda_make_array(l.variance, outputs);
00091 
00092         l.rolling_mean_gpu = cuda_make_array(l.mean, outputs);
00093         l.rolling_variance_gpu = cuda_make_array(l.variance, outputs);
00094 
00095         l.mean_delta_gpu = cuda_make_array(l.mean, outputs);
00096         l.variance_delta_gpu = cuda_make_array(l.variance, outputs);
00097 
00098         l.x_gpu = cuda_make_array(l.output, l.batch*outputs);
00099         l.x_norm_gpu = cuda_make_array(l.output, l.batch*outputs);
00100     }
00101 #endif
00102     l.activation = activation;
00103     fprintf(stderr, "connected                            %4d  ->  %4d\n", inputs, outputs);
00104     return l;
00105 }
00106 
00107 void update_connected_layer(connected_layer l, int batch, float learning_rate, float momentum, float decay)
00108 {
00109     axpy_cpu(l.outputs, learning_rate/batch, l.bias_updates, 1, l.biases, 1);
00110     scal_cpu(l.outputs, momentum, l.bias_updates, 1);
00111 
00112     if(l.batch_normalize){
00113         axpy_cpu(l.outputs, learning_rate/batch, l.scale_updates, 1, l.scales, 1);
00114         scal_cpu(l.outputs, momentum, l.scale_updates, 1);
00115     }
00116 
00117     axpy_cpu(l.inputs*l.outputs, -decay*batch, l.weights, 1, l.weight_updates, 1);
00118     axpy_cpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates, 1, l.weights, 1);
00119     scal_cpu(l.inputs*l.outputs, momentum, l.weight_updates, 1);
00120 }
00121 
00122 void forward_connected_layer(connected_layer l, network_state state)
00123 {
00124     int i;
00125     fill_cpu(l.outputs*l.batch, 0, l.output, 1);
00126     int m = l.batch;
00127     int k = l.inputs;
00128     int n = l.outputs;
00129     float *a = state.input;
00130     float *b = l.weights;
00131     float *c = l.output;
00132     gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
00133     if(l.batch_normalize){
00134         if(state.train){
00135             mean_cpu(l.output, l.batch, l.outputs, 1, l.mean);
00136             variance_cpu(l.output, l.mean, l.batch, l.outputs, 1, l.variance);
00137 
00138             scal_cpu(l.outputs, .95, l.rolling_mean, 1);
00139             axpy_cpu(l.outputs, .05, l.mean, 1, l.rolling_mean, 1);
00140             scal_cpu(l.outputs, .95, l.rolling_variance, 1);
00141             axpy_cpu(l.outputs, .05, l.variance, 1, l.rolling_variance, 1);
00142 
00143             copy_cpu(l.outputs*l.batch, l.output, 1, l.x, 1);
00144             normalize_cpu(l.output, l.mean, l.variance, l.batch, l.outputs, 1);   
00145             copy_cpu(l.outputs*l.batch, l.output, 1, l.x_norm, 1);
00146         } else {
00147             normalize_cpu(l.output, l.rolling_mean, l.rolling_variance, l.batch, l.outputs, 1);
00148         }
00149         scale_bias(l.output, l.scales, l.batch, l.outputs, 1);
00150     }
00151     for(i = 0; i < l.batch; ++i){
00152         axpy_cpu(l.outputs, 1, l.biases, 1, l.output + i*l.outputs, 1);
00153     }
00154     activate_array(l.output, l.outputs*l.batch, l.activation);
00155 }
00156 
00157 void backward_connected_layer(connected_layer l, network_state state)
00158 {
00159     int i;
00160     gradient_array(l.output, l.outputs*l.batch, l.activation, l.delta);
00161     for(i = 0; i < l.batch; ++i){
00162         axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
00163     }
00164     if(l.batch_normalize){
00165         backward_scale_cpu(l.x_norm, l.delta, l.batch, l.outputs, 1, l.scale_updates);
00166 
00167         scale_bias(l.delta, l.scales, l.batch, l.outputs, 1);
00168 
00169         mean_delta_cpu(l.delta, l.variance, l.batch, l.outputs, 1, l.mean_delta);
00170         variance_delta_cpu(l.x, l.delta, l.mean, l.variance, l.batch, l.outputs, 1, l.variance_delta);
00171         normalize_delta_cpu(l.x, l.mean, l.variance, l.mean_delta, l.variance_delta, l.batch, l.outputs, 1, l.delta);
00172     }
00173 
00174     int m = l.outputs;
00175     int k = l.batch;
00176     int n = l.inputs;
00177     float *a = l.delta;
00178     float *b = state.input;
00179     float *c = l.weight_updates;
00180     gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);
00181 
00182     m = l.batch;
00183     k = l.outputs;
00184     n = l.inputs;
00185 
00186     a = l.delta;
00187     b = l.weights;
00188     c = state.delta;
00189 
00190     if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
00191 }
00192 
00193 
00194 void denormalize_connected_layer(layer l)
00195 {
00196     int i, j;
00197     for(i = 0; i < l.outputs; ++i){
00198         float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
00199         for(j = 0; j < l.inputs; ++j){
00200             l.weights[i*l.inputs + j] *= scale;
00201         }
00202         l.biases[i] -= l.rolling_mean[i] * scale;
00203         l.scales[i] = 1;
00204         l.rolling_mean[i] = 0;
00205         l.rolling_variance[i] = 1;
00206     }
00207 }
00208 
00209 
00210 void statistics_connected_layer(layer l)
00211 {
00212     if(l.batch_normalize){
00213         printf("Scales ");
00214         print_statistics(l.scales, l.outputs);
00215         /*
00216         printf("Rolling Mean ");
00217         print_statistics(l.rolling_mean, l.outputs);
00218         printf("Rolling Variance ");
00219         print_statistics(l.rolling_variance, l.outputs);
00220         */
00221     }
00222     printf("Biases ");
00223     print_statistics(l.biases, l.outputs);
00224     printf("Weights ");
00225     print_statistics(l.weights, l.outputs);
00226 }
00227 
00228 #ifdef GPU
00229 
00230 void pull_connected_layer(connected_layer l)
00231 {
00232     cuda_pull_array(l.weights_gpu, l.weights, l.inputs*l.outputs);
00233     cuda_pull_array(l.biases_gpu, l.biases, l.outputs);
00234     cuda_pull_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs);
00235     cuda_pull_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
00236     if (l.batch_normalize){
00237         cuda_pull_array(l.scales_gpu, l.scales, l.outputs);
00238         cuda_pull_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs);
00239         cuda_pull_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs);
00240     }
00241 }
00242 
00243 void push_connected_layer(connected_layer l)
00244 {
00245     cuda_push_array(l.weights_gpu, l.weights, l.inputs*l.outputs);
00246     cuda_push_array(l.biases_gpu, l.biases, l.outputs);
00247     cuda_push_array(l.weight_updates_gpu, l.weight_updates, l.inputs*l.outputs);
00248     cuda_push_array(l.bias_updates_gpu, l.bias_updates, l.outputs);
00249     if (l.batch_normalize){
00250         cuda_push_array(l.scales_gpu, l.scales, l.outputs);
00251         cuda_push_array(l.rolling_mean_gpu, l.rolling_mean, l.outputs);
00252         cuda_push_array(l.rolling_variance_gpu, l.rolling_variance, l.outputs);
00253     }
00254 }
00255 
00256 void update_connected_layer_gpu(connected_layer l, int batch, float learning_rate, float momentum, float decay)
00257 {
00258     axpy_ongpu(l.outputs, learning_rate/batch, l.bias_updates_gpu, 1, l.biases_gpu, 1);
00259     scal_ongpu(l.outputs, momentum, l.bias_updates_gpu, 1);
00260 
00261     if(l.batch_normalize){
00262         axpy_ongpu(l.outputs, learning_rate/batch, l.scale_updates_gpu, 1, l.scales_gpu, 1);
00263         scal_ongpu(l.outputs, momentum, l.scale_updates_gpu, 1);
00264     }
00265 
00266     axpy_ongpu(l.inputs*l.outputs, -decay*batch, l.weights_gpu, 1, l.weight_updates_gpu, 1);
00267     axpy_ongpu(l.inputs*l.outputs, learning_rate/batch, l.weight_updates_gpu, 1, l.weights_gpu, 1);
00268     scal_ongpu(l.inputs*l.outputs, momentum, l.weight_updates_gpu, 1);
00269 }
00270 
00271 void forward_connected_layer_gpu(connected_layer l, network_state state)
00272 {
00273     int i;
00274     fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
00275 
00276     int m = l.batch;
00277     int k = l.inputs;
00278     int n = l.outputs;
00279     float * a = state.input;
00280     float * b = l.weights_gpu;
00281     float * c = l.output_gpu;
00282     gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
00283     if(l.batch_normalize){
00284         forward_batchnorm_layer_gpu(l, state);
00285     }
00286     for(i = 0; i < l.batch; ++i){
00287         axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
00288     }
00289     activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
00290 }
00291 
00292 void backward_connected_layer_gpu(connected_layer l, network_state state)
00293 {
00294     int i;
00295     constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
00296     gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
00297     for(i = 0; i < l.batch; ++i){
00298         axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1);
00299     }
00300 
00301     if(l.batch_normalize){
00302         backward_batchnorm_layer_gpu(l, state);
00303     }
00304 
00305     int m = l.outputs;
00306     int k = l.batch;
00307     int n = l.inputs;
00308     float * a = l.delta_gpu;
00309     float * b = state.input;
00310     float * c = l.weight_updates_gpu;
00311     gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n);
00312 
00313     m = l.batch;
00314     k = l.outputs;
00315     n = l.inputs;
00316 
00317     a = l.delta_gpu;
00318     b = l.weights_gpu;
00319     c = state.delta;
00320 
00321     if(c) gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
00322 }
00323 #endif


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:29