00001 #include "cost_layer.h" 00002 #include "utils.h" 00003 #include "cuda.h" 00004 #include "blas.h" 00005 #include <math.h> 00006 #include <string.h> 00007 #include <stdlib.h> 00008 #include <stdio.h> 00009 00010 COST_TYPE get_cost_type(char *s) 00011 { 00012 if (strcmp(s, "sse")==0) return SSE; 00013 if (strcmp(s, "masked")==0) return MASKED; 00014 if (strcmp(s, "smooth")==0) return SMOOTH; 00015 fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); 00016 return SSE; 00017 } 00018 00019 char *get_cost_string(COST_TYPE a) 00020 { 00021 switch(a){ 00022 case SSE: 00023 return "sse"; 00024 case MASKED: 00025 return "masked"; 00026 case SMOOTH: 00027 return "smooth"; 00028 } 00029 return "sse"; 00030 } 00031 00032 cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) 00033 { 00034 fprintf(stderr, "cost %4d\n", inputs); 00035 cost_layer l = {0}; 00036 l.type = COST; 00037 00038 l.scale = scale; 00039 l.batch = batch; 00040 l.inputs = inputs; 00041 l.outputs = inputs; 00042 l.cost_type = cost_type; 00043 l.delta = calloc(inputs*batch, sizeof(float)); 00044 l.output = calloc(inputs*batch, sizeof(float)); 00045 l.cost = calloc(1, sizeof(float)); 00046 00047 l.forward = forward_cost_layer; 00048 l.backward = backward_cost_layer; 00049 #ifdef GPU 00050 l.forward_gpu = forward_cost_layer_gpu; 00051 l.backward_gpu = backward_cost_layer_gpu; 00052 00053 l.delta_gpu = cuda_make_array(l.output, inputs*batch); 00054 l.output_gpu = cuda_make_array(l.delta, inputs*batch); 00055 #endif 00056 return l; 00057 } 00058 00059 void resize_cost_layer(cost_layer *l, int inputs) 00060 { 00061 l->inputs = inputs; 00062 l->outputs = inputs; 00063 l->delta = realloc(l->delta, inputs*l->batch*sizeof(float)); 00064 l->output = realloc(l->output, inputs*l->batch*sizeof(float)); 00065 #ifdef GPU 00066 cuda_free(l->delta_gpu); 00067 cuda_free(l->output_gpu); 00068 l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); 00069 l->output_gpu = cuda_make_array(l->output, inputs*l->batch); 00070 #endif 00071 } 00072 00073 void forward_cost_layer(cost_layer l, network_state state) 00074 { 00075 if (!state.truth) return; 00076 if(l.cost_type == MASKED){ 00077 int i; 00078 for(i = 0; i < l.batch*l.inputs; ++i){ 00079 if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM; 00080 } 00081 } 00082 if(l.cost_type == SMOOTH){ 00083 smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); 00084 } else { 00085 l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); 00086 } 00087 l.cost[0] = sum_array(l.output, l.batch*l.inputs); 00088 } 00089 00090 void backward_cost_layer(const cost_layer l, network_state state) 00091 { 00092 axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); 00093 } 00094 00095 #ifdef GPU 00096 00097 void pull_cost_layer(cost_layer l) 00098 { 00099 cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); 00100 } 00101 00102 void push_cost_layer(cost_layer l) 00103 { 00104 cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); 00105 } 00106 00107 int float_abs_compare (const void * a, const void * b) 00108 { 00109 float fa = *(const float*) a; 00110 if(fa < 0) fa = -fa; 00111 float fb = *(const float*) b; 00112 if(fb < 0) fb = -fb; 00113 return (fa > fb) - (fa < fb); 00114 } 00115 00116 void forward_cost_layer_gpu(cost_layer l, network_state state) 00117 { 00118 if (!state.truth) return; 00119 if (l.cost_type == MASKED) { 00120 mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); 00121 } 00122 00123 if(l.cost_type == SMOOTH){ 00124 smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); 00125 } else { 00126 l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); 00127 } 00128 00129 if(l.ratio){ 00130 cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); 00131 qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); 00132 int n = (1-l.ratio) * l.batch*l.inputs; 00133 float thresh = l.delta[n]; 00134 thresh = 0; 00135 printf("%f\n", thresh); 00136 supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); 00137 } 00138 00139 cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); 00140 l.cost[0] = sum_array(l.output, l.batch*l.inputs); 00141 } 00142 00143 void backward_cost_layer_gpu(const cost_layer l, network_state state) 00144 { 00145 axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); 00146 } 00147 #endif 00148