00001 #include "route_layer.h" 00002 #include "cuda.h" 00003 #include "blas.h" 00004 #include <stdio.h> 00005 00006 route_layer make_route_layer(int batch, int n, int *input_layers, int *input_sizes) 00007 { 00008 fprintf(stderr,"route "); 00009 route_layer l = {0}; 00010 l.type = ROUTE; 00011 l.batch = batch; 00012 l.n = n; 00013 l.input_layers = input_layers; 00014 l.input_sizes = input_sizes; 00015 int i; 00016 int outputs = 0; 00017 for(i = 0; i < n; ++i){ 00018 fprintf(stderr," %d", input_layers[i]); 00019 outputs += input_sizes[i]; 00020 } 00021 fprintf(stderr, "\n"); 00022 l.outputs = outputs; 00023 l.inputs = outputs; 00024 l.delta = calloc(outputs*batch, sizeof(float)); 00025 l.output = calloc(outputs*batch, sizeof(float));; 00026 00027 l.forward = forward_route_layer; 00028 l.backward = backward_route_layer; 00029 #ifdef GPU 00030 l.forward_gpu = forward_route_layer_gpu; 00031 l.backward_gpu = backward_route_layer_gpu; 00032 00033 l.delta_gpu = cuda_make_array(l.delta, outputs*batch); 00034 l.output_gpu = cuda_make_array(l.output, outputs*batch); 00035 #endif 00036 return l; 00037 } 00038 00039 void resize_route_layer(route_layer *l, network *net) 00040 { 00041 int i; 00042 layer first = net->layers[l->input_layers[0]]; 00043 l->out_w = first.out_w; 00044 l->out_h = first.out_h; 00045 l->out_c = first.out_c; 00046 l->outputs = first.outputs; 00047 l->input_sizes[0] = first.outputs; 00048 for(i = 1; i < l->n; ++i){ 00049 int index = l->input_layers[i]; 00050 layer next = net->layers[index]; 00051 l->outputs += next.outputs; 00052 l->input_sizes[i] = next.outputs; 00053 if(next.out_w == first.out_w && next.out_h == first.out_h){ 00054 l->out_c += next.out_c; 00055 }else{ 00056 printf("%d %d, %d %d\n", next.out_w, next.out_h, first.out_w, first.out_h); 00057 l->out_h = l->out_w = l->out_c = 0; 00058 } 00059 } 00060 l->inputs = l->outputs; 00061 l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); 00062 l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); 00063 00064 #ifdef GPU 00065 cuda_free(l->output_gpu); 00066 cuda_free(l->delta_gpu); 00067 l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); 00068 l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); 00069 #endif 00070 00071 } 00072 00073 void forward_route_layer(const route_layer l, network_state state) 00074 { 00075 int i, j; 00076 int offset = 0; 00077 for(i = 0; i < l.n; ++i){ 00078 int index = l.input_layers[i]; 00079 float *input = state.net.layers[index].output; 00080 int input_size = l.input_sizes[i]; 00081 for(j = 0; j < l.batch; ++j){ 00082 copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1); 00083 } 00084 offset += input_size; 00085 } 00086 } 00087 00088 void backward_route_layer(const route_layer l, network_state state) 00089 { 00090 int i, j; 00091 int offset = 0; 00092 for(i = 0; i < l.n; ++i){ 00093 int index = l.input_layers[i]; 00094 float *delta = state.net.layers[index].delta; 00095 int input_size = l.input_sizes[i]; 00096 for(j = 0; j < l.batch; ++j){ 00097 axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1); 00098 } 00099 offset += input_size; 00100 } 00101 } 00102 00103 #ifdef GPU 00104 void forward_route_layer_gpu(const route_layer l, network_state state) 00105 { 00106 int i, j; 00107 int offset = 0; 00108 for(i = 0; i < l.n; ++i){ 00109 int index = l.input_layers[i]; 00110 float *input = state.net.layers[index].output_gpu; 00111 int input_size = l.input_sizes[i]; 00112 for(j = 0; j < l.batch; ++j){ 00113 copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1); 00114 } 00115 offset += input_size; 00116 } 00117 } 00118 00119 void backward_route_layer_gpu(const route_layer l, network_state state) 00120 { 00121 int i, j; 00122 int offset = 0; 00123 for(i = 0; i < l.n; ++i){ 00124 int index = l.input_layers[i]; 00125 float *delta = state.net.layers[index].delta_gpu; 00126 int input_size = l.input_sizes[i]; 00127 for(j = 0; j < l.batch; ++j){ 00128 axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1); 00129 } 00130 offset += input_size; 00131 } 00132 } 00133 #endif