00001 #include "crnn_layer.h"
00002 #include "convolutional_layer.h"
00003 #include "utils.h"
00004 #include "cuda.h"
00005 #include "blas.h"
00006 #include "gemm.h"
00007
00008 #include <math.h>
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012
00013 static void increment_layer(layer *l, int steps)
00014 {
00015 int num = l->outputs*l->batch*steps;
00016 l->output += num;
00017 l->delta += num;
00018 l->x += num;
00019 l->x_norm += num;
00020
00021 #ifdef GPU
00022 l->output_gpu += num;
00023 l->delta_gpu += num;
00024 l->x_gpu += num;
00025 l->x_norm_gpu += num;
00026 #endif
00027 }
00028
00029 layer make_crnn_layer(int batch, int h, int w, int c, int hidden_filters, int output_filters, int steps, ACTIVATION activation, int batch_normalize)
00030 {
00031 fprintf(stderr, "CRNN Layer: %d x %d x %d image, %d filters\n", h,w,c,output_filters);
00032 batch = batch / steps;
00033 layer l = {0};
00034 l.batch = batch;
00035 l.type = CRNN;
00036 l.steps = steps;
00037 l.h = h;
00038 l.w = w;
00039 l.c = c;
00040 l.out_h = h;
00041 l.out_w = w;
00042 l.out_c = output_filters;
00043 l.inputs = h*w*c;
00044 l.hidden = h * w * hidden_filters;
00045 l.outputs = l.out_h * l.out_w * l.out_c;
00046
00047 l.state = calloc(l.hidden*batch*(steps+1), sizeof(float));
00048
00049 l.input_layer = malloc(sizeof(layer));
00050 fprintf(stderr, "\t\t");
00051 *(l.input_layer) = make_convolutional_layer(batch*steps, h, w, c, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0);
00052 l.input_layer->batch = batch;
00053
00054 l.self_layer = malloc(sizeof(layer));
00055 fprintf(stderr, "\t\t");
00056 *(l.self_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, hidden_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0);
00057 l.self_layer->batch = batch;
00058
00059 l.output_layer = malloc(sizeof(layer));
00060 fprintf(stderr, "\t\t");
00061 *(l.output_layer) = make_convolutional_layer(batch*steps, h, w, hidden_filters, output_filters, 3, 1, 1, activation, batch_normalize, 0, 0, 0);
00062 l.output_layer->batch = batch;
00063
00064 l.output = l.output_layer->output;
00065 l.delta = l.output_layer->delta;
00066
00067 l.forward = forward_crnn_layer;
00068 l.backward = backward_crnn_layer;
00069 l.update = update_crnn_layer;
00070
00071 #ifdef GPU
00072 l.forward_gpu = forward_crnn_layer_gpu;
00073 l.backward_gpu = backward_crnn_layer_gpu;
00074 l.update_gpu = update_crnn_layer_gpu;
00075
00076 l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
00077 l.output_gpu = l.output_layer->output_gpu;
00078 l.delta_gpu = l.output_layer->delta_gpu;
00079 #endif
00080
00081 return l;
00082 }
00083
00084 void update_crnn_layer(layer l, int batch, float learning_rate, float momentum, float decay)
00085 {
00086 update_convolutional_layer(*(l.input_layer), batch, learning_rate, momentum, decay);
00087 update_convolutional_layer(*(l.self_layer), batch, learning_rate, momentum, decay);
00088 update_convolutional_layer(*(l.output_layer), batch, learning_rate, momentum, decay);
00089 }
00090
00091 void forward_crnn_layer(layer l, network_state state)
00092 {
00093 network_state s = {0};
00094 s.train = state.train;
00095 int i;
00096 layer input_layer = *(l.input_layer);
00097 layer self_layer = *(l.self_layer);
00098 layer output_layer = *(l.output_layer);
00099
00100 fill_cpu(l.outputs * l.batch * l.steps, 0, output_layer.delta, 1);
00101 fill_cpu(l.hidden * l.batch * l.steps, 0, self_layer.delta, 1);
00102 fill_cpu(l.hidden * l.batch * l.steps, 0, input_layer.delta, 1);
00103 if(state.train) fill_cpu(l.hidden * l.batch, 0, l.state, 1);
00104
00105 for (i = 0; i < l.steps; ++i) {
00106 s.input = state.input;
00107 forward_convolutional_layer(input_layer, s);
00108
00109 s.input = l.state;
00110 forward_convolutional_layer(self_layer, s);
00111
00112 float *old_state = l.state;
00113 if(state.train) l.state += l.hidden*l.batch;
00114 if(l.shortcut){
00115 copy_cpu(l.hidden * l.batch, old_state, 1, l.state, 1);
00116 }else{
00117 fill_cpu(l.hidden * l.batch, 0, l.state, 1);
00118 }
00119 axpy_cpu(l.hidden * l.batch, 1, input_layer.output, 1, l.state, 1);
00120 axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
00121
00122 s.input = l.state;
00123 forward_convolutional_layer(output_layer, s);
00124
00125 state.input += l.inputs*l.batch;
00126 increment_layer(&input_layer, 1);
00127 increment_layer(&self_layer, 1);
00128 increment_layer(&output_layer, 1);
00129 }
00130 }
00131
00132 void backward_crnn_layer(layer l, network_state state)
00133 {
00134 network_state s = {0};
00135 s.train = state.train;
00136 int i;
00137 layer input_layer = *(l.input_layer);
00138 layer self_layer = *(l.self_layer);
00139 layer output_layer = *(l.output_layer);
00140
00141 increment_layer(&input_layer, l.steps-1);
00142 increment_layer(&self_layer, l.steps-1);
00143 increment_layer(&output_layer, l.steps-1);
00144
00145 l.state += l.hidden*l.batch*l.steps;
00146 for (i = l.steps-1; i >= 0; --i) {
00147 copy_cpu(l.hidden * l.batch, input_layer.output, 1, l.state, 1);
00148 axpy_cpu(l.hidden * l.batch, 1, self_layer.output, 1, l.state, 1);
00149
00150 s.input = l.state;
00151 s.delta = self_layer.delta;
00152 backward_convolutional_layer(output_layer, s);
00153
00154 l.state -= l.hidden*l.batch;
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164 s.input = l.state;
00165 s.delta = self_layer.delta - l.hidden*l.batch;
00166 if (i == 0) s.delta = 0;
00167 backward_convolutional_layer(self_layer, s);
00168
00169 copy_cpu(l.hidden*l.batch, self_layer.delta, 1, input_layer.delta, 1);
00170 if (i > 0 && l.shortcut) axpy_cpu(l.hidden*l.batch, 1, self_layer.delta, 1, self_layer.delta - l.hidden*l.batch, 1);
00171 s.input = state.input + i*l.inputs*l.batch;
00172 if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
00173 else s.delta = 0;
00174 backward_convolutional_layer(input_layer, s);
00175
00176 increment_layer(&input_layer, -1);
00177 increment_layer(&self_layer, -1);
00178 increment_layer(&output_layer, -1);
00179 }
00180 }
00181
00182 #ifdef GPU
00183
00184 void pull_crnn_layer(layer l)
00185 {
00186 pull_convolutional_layer(*(l.input_layer));
00187 pull_convolutional_layer(*(l.self_layer));
00188 pull_convolutional_layer(*(l.output_layer));
00189 }
00190
00191 void push_crnn_layer(layer l)
00192 {
00193 push_convolutional_layer(*(l.input_layer));
00194 push_convolutional_layer(*(l.self_layer));
00195 push_convolutional_layer(*(l.output_layer));
00196 }
00197
00198 void update_crnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
00199 {
00200 update_convolutional_layer_gpu(*(l.input_layer), batch, learning_rate, momentum, decay);
00201 update_convolutional_layer_gpu(*(l.self_layer), batch, learning_rate, momentum, decay);
00202 update_convolutional_layer_gpu(*(l.output_layer), batch, learning_rate, momentum, decay);
00203 }
00204
00205 void forward_crnn_layer_gpu(layer l, network_state state)
00206 {
00207 network_state s = {0};
00208 s.train = state.train;
00209 int i;
00210 layer input_layer = *(l.input_layer);
00211 layer self_layer = *(l.self_layer);
00212 layer output_layer = *(l.output_layer);
00213
00214 fill_ongpu(l.outputs * l.batch * l.steps, 0, output_layer.delta_gpu, 1);
00215 fill_ongpu(l.hidden * l.batch * l.steps, 0, self_layer.delta_gpu, 1);
00216 fill_ongpu(l.hidden * l.batch * l.steps, 0, input_layer.delta_gpu, 1);
00217 if(state.train) fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
00218
00219 for (i = 0; i < l.steps; ++i) {
00220 s.input = state.input;
00221 forward_convolutional_layer_gpu(input_layer, s);
00222
00223 s.input = l.state_gpu;
00224 forward_convolutional_layer_gpu(self_layer, s);
00225
00226 float *old_state = l.state_gpu;
00227 if(state.train) l.state_gpu += l.hidden*l.batch;
00228 if(l.shortcut){
00229 copy_ongpu(l.hidden * l.batch, old_state, 1, l.state_gpu, 1);
00230 }else{
00231 fill_ongpu(l.hidden * l.batch, 0, l.state_gpu, 1);
00232 }
00233 axpy_ongpu(l.hidden * l.batch, 1, input_layer.output_gpu, 1, l.state_gpu, 1);
00234 axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
00235
00236 s.input = l.state_gpu;
00237 forward_convolutional_layer_gpu(output_layer, s);
00238
00239 state.input += l.inputs*l.batch;
00240 increment_layer(&input_layer, 1);
00241 increment_layer(&self_layer, 1);
00242 increment_layer(&output_layer, 1);
00243 }
00244 }
00245
00246 void backward_crnn_layer_gpu(layer l, network_state state)
00247 {
00248 network_state s = {0};
00249 s.train = state.train;
00250 int i;
00251 layer input_layer = *(l.input_layer);
00252 layer self_layer = *(l.self_layer);
00253 layer output_layer = *(l.output_layer);
00254 increment_layer(&input_layer, l.steps - 1);
00255 increment_layer(&self_layer, l.steps - 1);
00256 increment_layer(&output_layer, l.steps - 1);
00257 l.state_gpu += l.hidden*l.batch*l.steps;
00258 for (i = l.steps-1; i >= 0; --i) {
00259 copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1);
00260 axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1);
00261
00262 s.input = l.state_gpu;
00263 s.delta = self_layer.delta_gpu;
00264 backward_convolutional_layer_gpu(output_layer, s);
00265
00266 l.state_gpu -= l.hidden*l.batch;
00267
00268 s.input = l.state_gpu;
00269 s.delta = self_layer.delta_gpu - l.hidden*l.batch;
00270 if (i == 0) s.delta = 0;
00271 backward_convolutional_layer_gpu(self_layer, s);
00272
00273 copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1);
00274 if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1);
00275 s.input = state.input + i*l.inputs*l.batch;
00276 if(state.delta) s.delta = state.delta + i*l.inputs*l.batch;
00277 else s.delta = 0;
00278 backward_convolutional_layer_gpu(input_layer, s);
00279
00280 increment_layer(&input_layer, -1);
00281 increment_layer(&self_layer, -1);
00282 increment_layer(&output_layer, -1);
00283 }
00284 }
00285 #endif