rail_object_detector: network.c Source File

Go to the documentation of this file.
00001 #include <stdio.h>
00002 #include <time.h>
00003 #include <assert.h>
00004 #include "network.h"
00005 #include "image.h"
00006 #include "data.h"
00007 #include "utils.h"
00008 #include "blas.h"
00009 
00010 #include "crop_layer.h"
00011 #include "connected_layer.h"
00012 #include "gru_layer.h"
00013 #include "rnn_layer.h"
00014 #include "crnn_layer.h"
00015 #include "local_layer.h"
00016 #include "convolutional_layer.h"
00017 #include "activation_layer.h"
00018 #include "detection_layer.h"
00019 #include "region_layer.h"
00020 #include "normalization_layer.h"
00021 #include "batchnorm_layer.h"
00022 #include "maxpool_layer.h"
00023 #include "reorg_layer.h"
00024 #include "avgpool_layer.h"
00025 #include "cost_layer.h"
00026 #include "softmax_layer.h"
00027 #include "dropout_layer.h"
00028 #include "route_layer.h"
00029 #include "shortcut_layer.h"
00030 
00031 int get_current_batch(network net)
00032 {
00033     int batch_num = (*net.seen)/(net.batch*net.subdivisions);
00034     return batch_num;
00035 }
00036 
00037 void reset_momentum(network net)
00038 {
00039     if (net.momentum == 0) return;
00040     net.learning_rate = 0;
00041     net.momentum = 0;
00042     net.decay = 0;
00043     #ifdef GPU
00044         //if(net.gpu_index >= 0) update_network_gpu(net);
00045     #endif
00046 }
00047 
00048 float get_current_rate(network net)
00049 {
00050     int batch_num = get_current_batch(net);
00051     int i;
00052     float rate;
00053     switch (net.policy) {
00054         case CONSTANT:
00055             return net.learning_rate;
00056         case STEP:
00057             return net.learning_rate * pow(net.scale, batch_num/net.step);
00058         case STEPS:
00059             rate = net.learning_rate;
00060             for(i = 0; i < net.num_steps; ++i){
00061                 if(net.steps[i] > batch_num) return rate;
00062                 rate *= net.scales[i];
00063                 //if(net.steps[i] > batch_num - 1 && net.scales[i] > 1) reset_momentum(net);
00064             }
00065             return rate;
00066         case EXP:
00067             return net.learning_rate * pow(net.gamma, batch_num);
00068         case POLY:
00069             if (batch_num < net.burn_in) return net.learning_rate * pow((float)batch_num / net.burn_in, net.power);
00070             return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
00071         case RANDOM:
00072             return net.learning_rate * pow(rand_uniform(0,1), net.power);
00073         case SIG:
00074             return net.learning_rate * (1./(1.+exp(net.gamma*(batch_num - net.step))));
00075         default:
00076             fprintf(stderr, "Policy is weird!\n");
00077             return net.learning_rate;
00078     }
00079 }
00080 
00081 char *get_layer_string(LAYER_TYPE a)
00082 {
00083     switch(a){
00084         case CONVOLUTIONAL:
00085             return "convolutional";
00086         case ACTIVE:
00087             return "activation";
00088         case LOCAL:
00089             return "local";
00090         case DECONVOLUTIONAL:
00091             return "deconvolutional";
00092         case CONNECTED:
00093             return "connected";
00094         case RNN:
00095             return "rnn";
00096         case GRU:
00097             return "gru";
00098         case CRNN:
00099             return "crnn";
00100         case MAXPOOL:
00101             return "maxpool";
00102         case REORG:
00103             return "reorg";
00104         case AVGPOOL:
00105             return "avgpool";
00106         case SOFTMAX:
00107             return "softmax";
00108         case DETECTION:
00109             return "detection";
00110         case REGION:
00111             return "region";
00112         case DROPOUT:
00113             return "dropout";
00114         case CROP:
00115             return "crop";
00116         case COST:
00117             return "cost";
00118         case ROUTE:
00119             return "route";
00120         case SHORTCUT:
00121             return "shortcut";
00122         case NORMALIZATION:
00123             return "normalization";
00124         case BATCHNORM:
00125             return "batchnorm";
00126         default:
00127             break;
00128     }
00129     return "none";
00130 }
00131 
00132 network make_network(int n)
00133 {
00134     network net = {0};
00135     net.n = n;
00136     net.layers = calloc(net.n, sizeof(layer));
00137     net.seen = calloc(1, sizeof(int));
00138     #ifdef GPU
00139     net.input_gpu = calloc(1, sizeof(float *));
00140     net.truth_gpu = calloc(1, sizeof(float *));
00141     #endif
00142     return net;
00143 }
00144 
00145 void forward_network(network net, network_state state)
00146 {
00147     state.workspace = net.workspace;
00148     int i;
00149     for(i = 0; i < net.n; ++i){
00150         state.index = i;
00151         layer l = net.layers[i];
00152         if(l.delta){
00153             scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
00154         }
00155         l.forward(l, state);
00156         state.input = l.output;
00157     }
00158 }
00159 
00160 void update_network(network net)
00161 {
00162     int i;
00163     int update_batch = net.batch*net.subdivisions;
00164     float rate = get_current_rate(net);
00165     for(i = 0; i < net.n; ++i){
00166         layer l = net.layers[i];
00167         if(l.update){
00168             l.update(l, update_batch, rate, net.momentum, net.decay);
00169         }
00170     }
00171 }
00172 
00173 float *get_network_output(network net)
00174 {
00175 #ifdef GPU
00176     if (gpu_index >= 0) return get_network_output_gpu(net);
00177 #endif 
00178     int i;
00179     for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
00180     return net.layers[i].output;
00181 }
00182 
00183 float get_network_cost(network net)
00184 {
00185     int i;
00186     float sum = 0;
00187     int count = 0;
00188     for(i = 0; i < net.n; ++i){
00189         if(net.layers[i].cost){
00190             sum += net.layers[i].cost[0];
00191             ++count;
00192         }
00193     }
00194     return sum/count;
00195 }
00196 
00197 int get_predicted_class_network(network net)
00198 {
00199     float *out = get_network_output(net);
00200     int k = get_network_output_size(net);
00201     return max_index(out, k);
00202 }
00203 
00204 void backward_network(network net, network_state state)
00205 {
00206     int i;
00207     float *original_input = state.input;
00208     float *original_delta = state.delta;
00209     state.workspace = net.workspace;
00210     for(i = net.n-1; i >= 0; --i){
00211         state.index = i;
00212         if(i == 0){
00213             state.input = original_input;
00214             state.delta = original_delta;
00215         }else{
00216             layer prev = net.layers[i-1];
00217             state.input = prev.output;
00218             state.delta = prev.delta;
00219         }
00220         layer l = net.layers[i];
00221         l.backward(l, state);
00222     }
00223 }
00224 
00225 float train_network_datum(network net, float *x, float *y)
00226 {
00227 #ifdef GPU
00228     if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
00229 #endif
00230     network_state state;
00231     *net.seen += net.batch;
00232     state.index = 0;
00233     state.net = net;
00234     state.input = x;
00235     state.delta = 0;
00236     state.truth = y;
00237     state.train = 1;
00238     forward_network(net, state);
00239     backward_network(net, state);
00240     float error = get_network_cost(net);
00241     if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
00242     return error;
00243 }
00244 
00245 float train_network_sgd(network net, data d, int n)
00246 {
00247     int batch = net.batch;
00248     float *X = calloc(batch*d.X.cols, sizeof(float));
00249     float *y = calloc(batch*d.y.cols, sizeof(float));
00250 
00251     int i;
00252     float sum = 0;
00253     for(i = 0; i < n; ++i){
00254         get_random_batch(d, batch, X, y);
00255         float err = train_network_datum(net, X, y);
00256         sum += err;
00257     }
00258     free(X);
00259     free(y);
00260     return (float)sum/(n*batch);
00261 }
00262 
00263 float train_network(network net, data d)
00264 {
00265     assert(d.X.rows % net.batch == 0);
00266     int batch = net.batch;
00267     int n = d.X.rows / batch;
00268     float *X = calloc(batch*d.X.cols, sizeof(float));
00269     float *y = calloc(batch*d.y.cols, sizeof(float));
00270 
00271     int i;
00272     float sum = 0;
00273     for(i = 0; i < n; ++i){
00274         get_next_batch(d, batch, i*batch, X, y);
00275         float err = train_network_datum(net, X, y);
00276         sum += err;
00277     }
00278     free(X);
00279     free(y);
00280     return (float)sum/(n*batch);
00281 }
00282 
00283 
00284 float train_network_batch(network net, data d, int n)
00285 {
00286     int i,j;
00287     network_state state;
00288     state.index = 0;
00289     state.net = net;
00290     state.train = 1;
00291     state.delta = 0;
00292     float sum = 0;
00293     int batch = 2;
00294     for(i = 0; i < n; ++i){
00295         for(j = 0; j < batch; ++j){
00296             int index = rand()%d.X.rows;
00297             state.input = d.X.vals[index];
00298             state.truth = d.y.vals[index];
00299             forward_network(net, state);
00300             backward_network(net, state);
00301             sum += get_network_cost(net);
00302         }
00303         update_network(net);
00304     }
00305     return (float)sum/(n*batch);
00306 }
00307 
00308 void set_batch_network(network *net, int b)
00309 {
00310     net->batch = b;
00311     int i;
00312     for(i = 0; i < net->n; ++i){
00313         net->layers[i].batch = b;
00314 #ifdef CUDNN
00315         if(net->layers[i].type == CONVOLUTIONAL){
00316             cudnn_convolutional_setup(net->layers + i);
00317         }
00318 #endif
00319     }
00320 }
00321 
00322 int resize_network(network *net, int w, int h)
00323 {
00324 #ifdef GPU
00325     cuda_set_device(net->gpu_index);
00326     if(gpu_index >= 0){
00327         cuda_free(net->workspace);
00328     }
00329 #endif
00330     int i;
00331     //if(w == net->w && h == net->h) return 0;
00332     net->w = w;
00333     net->h = h;
00334     int inputs = 0;
00335     size_t workspace_size = 0;
00336     //fprintf(stderr, "Resizing to %d x %d...\n", w, h);
00337     //fflush(stderr);
00338     for (i = 0; i < net->n; ++i){
00339         layer l = net->layers[i];
00340         if(l.type == CONVOLUTIONAL){
00341             resize_convolutional_layer(&l, w, h);
00342         }else if(l.type == CROP){
00343             resize_crop_layer(&l, w, h);
00344         }else if(l.type == MAXPOOL){
00345             resize_maxpool_layer(&l, w, h);
00346         }else if(l.type == REGION){
00347             resize_region_layer(&l, w, h);
00348         }else if(l.type == ROUTE){
00349             resize_route_layer(&l, net);
00350         }else if(l.type == REORG){
00351             resize_reorg_layer(&l, w, h);
00352         }else if(l.type == AVGPOOL){
00353             resize_avgpool_layer(&l, w, h);
00354         }else if(l.type == NORMALIZATION){
00355             resize_normalization_layer(&l, w, h);
00356         }else if(l.type == COST){
00357             resize_cost_layer(&l, inputs);
00358         }else{
00359             error("Cannot resize this type of layer");
00360         }
00361         if(l.workspace_size > workspace_size) workspace_size = l.workspace_size;
00362         inputs = l.outputs;
00363         net->layers[i] = l;
00364         w = l.out_w;
00365         h = l.out_h;
00366         if(l.type == AVGPOOL) break;
00367     }
00368 #ifdef GPU
00369     if(gpu_index >= 0){
00370         if(net->input_gpu) {
00371             cuda_free(*net->input_gpu);
00372             *net->input_gpu = 0;
00373             cuda_free(*net->truth_gpu);
00374             *net->truth_gpu = 0;
00375         }
00376         net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
00377     }else {
00378         free(net->workspace);
00379         net->workspace = calloc(1, workspace_size);
00380     }
00381 #else
00382     free(net->workspace);
00383     net->workspace = calloc(1, workspace_size);
00384 #endif
00385     //fprintf(stderr, " Done!\n");
00386     return 0;
00387 }
00388 
00389 int get_network_output_size(network net)
00390 {
00391     int i;
00392     for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
00393     return net.layers[i].outputs;
00394 }
00395 
00396 int get_network_input_size(network net)
00397 {
00398     return net.layers[0].inputs;
00399 }
00400 
00401 detection_layer get_network_detection_layer(network net)
00402 {
00403     int i;
00404     for(i = 0; i < net.n; ++i){
00405         if(net.layers[i].type == DETECTION){
00406             return net.layers[i];
00407         }
00408     }
00409     fprintf(stderr, "Detection layer not found!!\n");
00410     detection_layer l = {0};
00411     return l;
00412 }
00413 
00414 image get_network_image_layer(network net, int i)
00415 {
00416     layer l = net.layers[i];
00417     if (l.out_w && l.out_h && l.out_c){
00418         return float_to_image(l.out_w, l.out_h, l.out_c, l.output);
00419     }
00420     image def = {0};
00421     return def;
00422 }
00423 
00424 image get_network_image(network net)
00425 {
00426     int i;
00427     for(i = net.n-1; i >= 0; --i){
00428         image m = get_network_image_layer(net, i);
00429         if(m.h != 0) return m;
00430     }
00431     image def = {0};
00432     return def;
00433 }
00434 
00435 void visualize_network(network net)
00436 {
00437     image *prev = 0;
00438     int i;
00439     char buff[256];
00440     for(i = 0; i < net.n; ++i){
00441         sprintf(buff, "Layer %d", i);
00442         layer l = net.layers[i];
00443         if(l.type == CONVOLUTIONAL){
00444             prev = visualize_convolutional_layer(l, buff, prev);
00445         }
00446     } 
00447 }
00448 
00449 void top_predictions(network net, int k, int *index)
00450 {
00451     int size = get_network_output_size(net);
00452     float *out = get_network_output(net);
00453     top_k(out, size, k, index);
00454 }
00455 
00456 
00457 float *network_predict(network net, float *input)
00458 {
00459 #ifdef GPU
00460     if(gpu_index >= 0)  return network_predict_gpu(net, input);
00461 #endif
00462 
00463     network_state state;
00464     state.net = net;
00465     state.index = 0;
00466     state.input = input;
00467     state.truth = 0;
00468     state.train = 0;
00469     state.delta = 0;
00470     forward_network(net, state);
00471     float *out = get_network_output(net);
00472     return out;
00473 }
00474 
00475 matrix network_predict_data_multi(network net, data test, int n)
00476 {
00477     int i,j,b,m;
00478     int k = get_network_output_size(net);
00479     matrix pred = make_matrix(test.X.rows, k);
00480     float *X = calloc(net.batch*test.X.rows, sizeof(float));
00481     for(i = 0; i < test.X.rows; i += net.batch){
00482         for(b = 0; b < net.batch; ++b){
00483             if(i+b == test.X.rows) break;
00484             memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
00485         }
00486         for(m = 0; m < n; ++m){
00487             float *out = network_predict(net, X);
00488             for(b = 0; b < net.batch; ++b){
00489                 if(i+b == test.X.rows) break;
00490                 for(j = 0; j < k; ++j){
00491                     pred.vals[i+b][j] += out[j+b*k]/n;
00492                 }
00493             }
00494         }
00495     }
00496     free(X);
00497     return pred;   
00498 }
00499 
00500 matrix network_predict_data(network net, data test)
00501 {
00502     int i,j,b;
00503     int k = get_network_output_size(net);
00504     matrix pred = make_matrix(test.X.rows, k);
00505     float *X = calloc(net.batch*test.X.cols, sizeof(float));
00506     for(i = 0; i < test.X.rows; i += net.batch){
00507         for(b = 0; b < net.batch; ++b){
00508             if(i+b == test.X.rows) break;
00509             memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
00510         }
00511         float *out = network_predict(net, X);
00512         for(b = 0; b < net.batch; ++b){
00513             if(i+b == test.X.rows) break;
00514             for(j = 0; j < k; ++j){
00515                 pred.vals[i+b][j] = out[j+b*k];
00516             }
00517         }
00518     }
00519     free(X);
00520     return pred;   
00521 }
00522 
00523 void print_network(network net)
00524 {
00525     int i,j;
00526     for(i = 0; i < net.n; ++i){
00527         layer l = net.layers[i];
00528         float *output = l.output;
00529         int n = l.outputs;
00530         float mean = mean_array(output, n);
00531         float vari = variance_array(output, n);
00532         fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari);
00533         if(n > 100) n = 100;
00534         for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]);
00535         if(n == 100)fprintf(stderr,".....\n");
00536         fprintf(stderr, "\n");
00537     }
00538 }
00539 
00540 void compare_networks(network n1, network n2, data test)
00541 {
00542     matrix g1 = network_predict_data(n1, test);
00543     matrix g2 = network_predict_data(n2, test);
00544     int i;
00545     int a,b,c,d;
00546     a = b = c = d = 0;
00547     for(i = 0; i < g1.rows; ++i){
00548         int truth = max_index(test.y.vals[i], test.y.cols);
00549         int p1 = max_index(g1.vals[i], g1.cols);
00550         int p2 = max_index(g2.vals[i], g2.cols);
00551         if(p1 == truth){
00552             if(p2 == truth) ++d;
00553             else ++c;
00554         }else{
00555             if(p2 == truth) ++b;
00556             else ++a;
00557         }
00558     }
00559     printf("%5d %5d\n%5d %5d\n", a, b, c, d);
00560     float num = pow((abs(b - c) - 1.), 2.);
00561     float den = b + c;
00562     printf("%f\n", num/den); 
00563 }
00564 
00565 float network_accuracy(network net, data d)
00566 {
00567     matrix guess = network_predict_data(net, d);
00568     float acc = matrix_topk_accuracy(d.y, guess,1);
00569     free_matrix(guess);
00570     return acc;
00571 }
00572 
00573 float *network_accuracies(network net, data d, int n)
00574 {
00575     static float acc[2];
00576     matrix guess = network_predict_data(net, d);
00577     acc[0] = matrix_topk_accuracy(d.y, guess, 1);
00578     acc[1] = matrix_topk_accuracy(d.y, guess, n);
00579     free_matrix(guess);
00580     return acc;
00581 }
00582 
00583 float network_accuracy_multi(network net, data d, int n)
00584 {
00585     matrix guess = network_predict_data_multi(net, d, n);
00586     float acc = matrix_topk_accuracy(d.y, guess,1);
00587     free_matrix(guess);
00588     return acc;
00589 }
00590 
00591 void free_network(network net)
00592 {
00593     int i;
00594     for(i = 0; i < net.n; ++i){
00595         free_layer(net.layers[i]);
00596     }
00597     free(net.layers);
00598 #ifdef GPU
00599     if(*net.input_gpu) cuda_free(*net.input_gpu);
00600     if(*net.truth_gpu) cuda_free(*net.truth_gpu);
00601     if(net.input_gpu) free(net.input_gpu);
00602     if(net.truth_gpu) free(net.truth_gpu);
00603 #endif
00604 }