rail_object_detector: cost_layer.c Source File

Go to the documentation of this file.
00001 #include "cost_layer.h"
00002 #include "utils.h"
00003 #include "cuda.h"
00004 #include "blas.h"
00005 #include <math.h>
00006 #include <string.h>
00007 #include <stdlib.h>
00008 #include <stdio.h>
00009 
00010 COST_TYPE get_cost_type(char *s)
00011 {
00012     if (strcmp(s, "sse")==0) return SSE;
00013     if (strcmp(s, "masked")==0) return MASKED;
00014     if (strcmp(s, "smooth")==0) return SMOOTH;
00015     fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s);
00016     return SSE;
00017 }
00018 
00019 char *get_cost_string(COST_TYPE a)
00020 {
00021     switch(a){
00022         case SSE:
00023             return "sse";
00024         case MASKED:
00025             return "masked";
00026         case SMOOTH:
00027             return "smooth";
00028     }
00029     return "sse";
00030 }
00031 
00032 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
00033 {
00034     fprintf(stderr, "cost                                           %4d\n",  inputs);
00035     cost_layer l = {0};
00036     l.type = COST;
00037 
00038     l.scale = scale;
00039     l.batch = batch;
00040     l.inputs = inputs;
00041     l.outputs = inputs;
00042     l.cost_type = cost_type;
00043     l.delta = calloc(inputs*batch, sizeof(float));
00044     l.output = calloc(inputs*batch, sizeof(float));
00045     l.cost = calloc(1, sizeof(float));
00046 
00047     l.forward = forward_cost_layer;
00048     l.backward = backward_cost_layer;
00049     #ifdef GPU
00050     l.forward_gpu = forward_cost_layer_gpu;
00051     l.backward_gpu = backward_cost_layer_gpu;
00052 
00053     l.delta_gpu = cuda_make_array(l.output, inputs*batch);
00054     l.output_gpu = cuda_make_array(l.delta, inputs*batch);
00055     #endif
00056     return l;
00057 }
00058 
00059 void resize_cost_layer(cost_layer *l, int inputs)
00060 {
00061     l->inputs = inputs;
00062     l->outputs = inputs;
00063     l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
00064     l->output = realloc(l->output, inputs*l->batch*sizeof(float));
00065 #ifdef GPU
00066     cuda_free(l->delta_gpu);
00067     cuda_free(l->output_gpu);
00068     l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch);
00069     l->output_gpu = cuda_make_array(l->output, inputs*l->batch);
00070 #endif
00071 }
00072 
00073 void forward_cost_layer(cost_layer l, network_state state)
00074 {
00075     if (!state.truth) return;
00076     if(l.cost_type == MASKED){
00077         int i;
00078         for(i = 0; i < l.batch*l.inputs; ++i){
00079             if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM;
00080         }
00081     }
00082     if(l.cost_type == SMOOTH){
00083         smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
00084     } else {
00085         l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output);
00086     }
00087     l.cost[0] = sum_array(l.output, l.batch*l.inputs);
00088 }
00089 
00090 void backward_cost_layer(const cost_layer l, network_state state)
00091 {
00092     axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1);
00093 }
00094 
00095 #ifdef GPU
00096 
00097 void pull_cost_layer(cost_layer l)
00098 {
00099     cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
00100 }
00101 
00102 void push_cost_layer(cost_layer l)
00103 {
00104     cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
00105 }
00106 
00107 int float_abs_compare (const void * a, const void * b)
00108 {
00109     float fa = *(const float*) a;
00110     if(fa < 0) fa = -fa;
00111     float fb = *(const float*) b;
00112     if(fb < 0) fb = -fb;
00113     return (fa > fb) - (fa < fb);
00114 }
00115 
00116 void forward_cost_layer_gpu(cost_layer l, network_state state)
00117 {
00118     if (!state.truth) return;
00119     if (l.cost_type == MASKED) {
00120         mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
00121     }
00122 
00123     if(l.cost_type == SMOOTH){
00124         smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
00125     } else {
00126         l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
00127     }
00128 
00129     if(l.ratio){
00130         cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
00131         qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
00132         int n = (1-l.ratio) * l.batch*l.inputs;
00133         float thresh = l.delta[n];
00134         thresh = 0;
00135         printf("%f\n", thresh);
00136         supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
00137     }
00138 
00139     cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
00140     l.cost[0] = sum_array(l.output, l.batch*l.inputs);
00141 }
00142 
00143 void backward_cost_layer_gpu(const cost_layer l, network_state state)
00144 {
00145     axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
00146 }
00147 #endif
00148