crnn_layer.c
Go to the documentation of this file.
00001 #include "crnn_layer.h"
00002 #include "convolutional_layer.h"
00003 #include "utils.h"
00004 #include "cuda.h"
00005 #include "blas.h"
00006 #include "gemm.h"
00007 
00008 #include <math.h>
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 
00013 static void increment_layer(layer *l, int steps)
00014 {
00015     int num = l->outputs*l->batch*steps;
00016     l->output += num;
00017     l->delta += num;
00018     l->x += num;
00019     l->x_norm += num;
00020 
00021 #ifdef GPU
00022     l->output_gpu += num;
00023     l->delta_gpu += num;
00024     l->x_gpu += num;
00025     l->x_norm_gpu += num;
00026 #endif
00027 }
00028 
00029 layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
00030 {
00031     fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
00032     batch = batch / steps;
00033     layer l = {0};
00034     l.batch = batch;
00035     l.type = CRNN;
00036     l.steps = steps;
00037     l.h = h;
00038     l.w = w;
00039     l.c = c;
00040     l.out_h = h;
00041     l.out_w = w;
00042     l.out_c = output_filters;
00043     l.inputs = h*w*c;
00044     l.hidden = h * w * hidden_filters;
00045     l.outputs = l.out_h * l.out_w * l.out_c;
00046 
00047     l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
00048 
00049     l.input_layer = malloc(sizeof(layer));
00050     fprintf(stderr, "\t\t");
00051     *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
00052     l.input_layer->batch = batch;
00053 
00054     l.self_layer = malloc(sizeof(layer));
00055     fprintf(stderr, "\t\t");
00056     *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
00057     l.self_layer->batch = batch;
00058 
00059     l.output_layer = malloc(sizeof(layer));
00060     fprintf(stderr, "\t\t");
00061     *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 3, 1, 1,  activation, batch_normalize, 0, 0, 0);
00062     l.output_layer->batch = batch;
00063 
00064     l.output = l.output_layer->output;
00065     l.delta = l.output_layer->delta;
00066 
00067     l.forward = forward_crnn_layer;
00068     l.backward = backward_crnn_layer;
00069     l.update = update_crnn_layer;
00070 
00071 #ifdef GPU
00072     l.forward_gpu = forward_crnn_layer_gpu;
00073     l.backward_gpu = backward_crnn_layer_gpu;
00074     l.update_gpu = update_crnn_layer_gpu;
00075 
00076     l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
00077     l.output_gpu = l.output_layer->output_gpu;
00078     l.delta_gpu = l.output_layer->delta_gpu;
00079 #endif
00080 
00081     return l;
00082 }
00083 
00084 void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay)
00085 {
00086     update_convolutional_layer(*(l.input_layer), batch, learning_rate, momentum, decay);
00087     update_convolutional_layer(*(l.self_layer), batch, learning_rate, momentum, decay);
00088     update_convolutional_layer(*(l.output_layer), batch, learning_rate, momentum, decay);
00089 }
00090 
00091 void forward_crnn_layer(layer l, network_state state)
00092 {
00093     network_state s = {0};
00094     s.train = state.train;
00095     int i;
00096     layer input_layer = *(l.input_layer);
00097     layer self_layer = *(l.self_layer);
00098     layer output_layer = *(l.output_layer);
00099 
00100     fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
00101     fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
00102     fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
00103     if(state.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
00104 
00105     for (i = 0; i < l.steps; ++i) {
00106         s.input = state.input;
00107         forward_convolutional_layer(input_layer, s);
00108 
00109         s.input = l.state;
00110         forward_convolutional_layer(self_layer, s);
00111 
00112         float *old_state = l.state;
00113         if(state.train) l.state += l.hidden*l.batch;
00114         if(l.shortcut){
00115             copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
00116         }else{
00117             fill_cpu(l.hidden * l.batch, 0, l.state, 1);
00118         }
00119         axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
00120         axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
00121 
00122         s.input = l.state;
00123         forward_convolutional_layer(output_layer, s);
00124 
00125         state.input += l.inputs*l.batch;
00126         increment_layer(&input_layer, 1);
00127         increment_layer(&self_layer, 1);
00128         increment_layer(&output_layer, 1);
00129     }
00130 }
00131 
00132 void backward_crnn_layer(layer l, network_state state)
00133 {
00134     network_state s = {0};
00135     s.train = state.train;
00136     int i;
00137     layer input_layer = *(l.input_layer);
00138     layer self_layer = *(l.self_layer);
00139     layer output_layer = *(l.output_layer);
00140 
00141     increment_layer(&input_layer, l.steps-1);
00142     increment_layer(&self_layer, l.steps-1);
00143     increment_layer(&output_layer, l.steps-1);
00144 
00145     l.state += l.hidden*l.batch*l.steps;
00146     for (i = l.steps-1; i >= 0; --i) {
00147         copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
00148         axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
00149 
00150         s.input = l.state;
00151         s.delta = self_layer.delta;
00152         backward_convolutional_layer(output_layer, s);
00153 
00154         l.state -= l.hidden*l.batch;
00155         /*
00156            if(i > 0){
00157            copy_cpu(l.hidden * l.batch, input_layer.output - l.hidden*l.batch, 1, l.state, 1);
00158            axpy_cpu(l.hidden * l.batch, 1, self_layer.output - l.hidden*l.batch, 1, l.state, 1);
00159            }else{
00160            fill_cpu(l.hidden * l.batch, 0, l.state, 1);
00161            }
00162          */
00163 
00164         s.input = l.state;
00165         s.delta = self_layer.delta - l.hidden*l.batch;
00166         if (i == 0) s.delta = 0;
00167         backward_convolutional_layer(self_layer, s);
00168 
00169         copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
00170         if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
00171         s.input = state.input + i*l.inputs*l.batch;
00172         if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
00173         else s.delta = 0;
00174         backward_convolutional_layer(input_layer, s);
00175 
00176         increment_layer(&input_layer, -1);
00177         increment_layer(&self_layer, -1);
00178         increment_layer(&output_layer, -1);
00179     }
00180 }
00181 
00182 #ifdef GPU
00183 
00184 void pull_crnn_layer(layer l)
00185 {
00186     pull_convolutional_layer(*(l.input_layer));
00187     pull_convolutional_layer(*(l.self_layer));
00188     pull_convolutional_layer(*(l.output_layer));
00189 }
00190 
00191 void push_crnn_layer(layer l)
00192 {
00193     push_convolutional_layer(*(l.input_layer));
00194     push_convolutional_layer(*(l.self_layer));
00195     push_convolutional_layer(*(l.output_layer));
00196 }
00197 
00198 void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
00199 {
00200     update_convolutional_layer_gpu(*(l.input_layer), batch, learning_rate, momentum, decay);
00201     update_convolutional_layer_gpu(*(l.self_layer), batch, learning_rate, momentum, decay);
00202     update_convolutional_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay);
00203 }
00204 
00205 void forward_crnn_layer_gpu(layer l, network_state state)
00206 {
00207     network_state s = {0};
00208     s.train = state.train;
00209     int i;
00210     layer input_layer = *(l.input_layer);
00211     layer self_layer = *(l.self_layer);
00212     layer output_layer = *(l.output_layer);
00213 
00214     fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
00215     fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
00216     fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
00217     if(state.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
00218 
00219     for (i = 0; i < l.steps; ++i) {
00220         s.input = state.input;
00221         forward_convolutional_layer_gpu(input_layer, s);
00222 
00223         s.input = l.state_gpu;
00224         forward_convolutional_layer_gpu(self_layer, s);
00225 
00226         float *old_state = l.state_gpu;
00227         if(state.train) l.state_gpu += l.hidden*l.batch;
00228         if(l.shortcut){
00229             copy_ongpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1);
00230         }else{
00231             fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
00232         }
00233         axpy_ongpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
00234         axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
00235 
00236         s.input = l.state_gpu;
00237         forward_convolutional_layer_gpu(output_layer, s);
00238 
00239         state.input += l.inputs*l.batch;
00240         increment_layer(&input_layer, 1);
00241         increment_layer(&self_layer, 1);
00242         increment_layer(&output_layer, 1);
00243     }
00244 }
00245 
00246 void backward_crnn_layer_gpu(layer l, network_state state)
00247 {
00248     network_state s = {0};
00249     s.train = state.train;
00250     int i;
00251     layer input_layer = *(l.input_layer);
00252     layer self_layer = *(l.self_layer);
00253     layer output_layer = *(l.output_layer);
00254     increment_layer(&input_layer,  l.steps - 1);
00255     increment_layer(&self_layer,   l.steps - 1);
00256     increment_layer(&output_layer, l.steps - 1);
00257     l.state_gpu += l.hidden*l.batch*l.steps;
00258     for (i = l.steps-1; i >= 0; --i) {
00259         copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
00260         axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
00261 
00262         s.input = l.state_gpu;
00263         s.delta = self_layer.delta_gpu;
00264         backward_convolutional_layer_gpu(output_layer, s);
00265 
00266         l.state_gpu -= l.hidden*l.batch;
00267 
00268         s.input = l.state_gpu;
00269         s.delta = self_layer.delta_gpu - l.hidden*l.batch;
00270         if (i == 0) s.delta = 0;
00271         backward_convolutional_layer_gpu(self_layer, s);
00272 
00273         copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
00274         if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
00275         s.input = state.input + i*l.inputs*l.batch;
00276         if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
00277         else s.delta = 0;
00278         backward_convolutional_layer_gpu(input_layer, s);
00279 
00280         increment_layer(&input_layer,  -1);
00281         increment_layer(&self_layer,   -1);
00282         increment_layer(&output_layer, -1);
00283     }
00284 }
00285 #endif


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:29