gru_layer.c
Go to the documentation of this file.
00001 #include "gru_layer.h"
00002 #include "connected_layer.h"
00003 #include "utils.h"
00004 #include "cuda.h"
00005 #include "blas.h"
00006 #include "gemm.h"
00007 
00008 #include <math.h>
00009 #include <stdio.h>
00010 #include <stdlib.h>
00011 #include <string.h>
00012 
00013 static void increment_layer(layer *l, int steps)
00014 {
00015     int num = l->outputs*l->batch*steps;
00016     l->output += num;
00017     l->delta += num;
00018     l->x += num;
00019     l->x_norm += num;
00020 
00021 #ifdef GPU
00022     l->output_gpu += num;
00023     l->delta_gpu += num;
00024     l->x_gpu += num;
00025     l->x_norm_gpu += num;
00026 #endif
00027 }
00028 
00029 layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize)
00030 {
00031     fprintf(stderr, "GRU Layer: %d inputs, %d outputs\n", inputs, outputs);
00032     batch = batch / steps;
00033     layer l = {0};
00034     l.batch = batch;
00035     l.type = GRU;
00036     l.steps = steps;
00037     l.inputs = inputs;
00038 
00039     l.input_z_layer = malloc(sizeof(layer));
00040     fprintf(stderr, "\t\t");
00041     *(l.input_z_layer) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize);
00042     l.input_z_layer->batch = batch;
00043 
00044     l.state_z_layer = malloc(sizeof(layer));
00045     fprintf(stderr, "\t\t");
00046     *(l.state_z_layer) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize);
00047     l.state_z_layer->batch = batch;
00048 
00049 
00050 
00051     l.input_r_layer = malloc(sizeof(layer));
00052     fprintf(stderr, "\t\t");
00053     *(l.input_r_layer) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize);
00054     l.input_r_layer->batch = batch;
00055 
00056     l.state_r_layer = malloc(sizeof(layer));
00057     fprintf(stderr, "\t\t");
00058     *(l.state_r_layer) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize);
00059     l.state_r_layer->batch = batch;
00060 
00061 
00062 
00063     l.input_h_layer = malloc(sizeof(layer));
00064     fprintf(stderr, "\t\t");
00065     *(l.input_h_layer) = make_connected_layer(batch*steps, inputs, outputs, LINEAR, batch_normalize);
00066     l.input_h_layer->batch = batch;
00067 
00068     l.state_h_layer = malloc(sizeof(layer));
00069     fprintf(stderr, "\t\t");
00070     *(l.state_h_layer) = make_connected_layer(batch*steps, outputs, outputs, LINEAR, batch_normalize);
00071     l.state_h_layer->batch = batch;
00072 
00073     l.batch_normalize = batch_normalize;
00074 
00075 
00076     l.outputs = outputs;
00077     l.output = calloc(outputs*batch*steps, sizeof(float));
00078     l.delta = calloc(outputs*batch*steps, sizeof(float));
00079     l.state = calloc(outputs*batch, sizeof(float));
00080     l.prev_state = calloc(outputs*batch, sizeof(float));
00081     l.forgot_state = calloc(outputs*batch, sizeof(float));
00082     l.forgot_delta = calloc(outputs*batch, sizeof(float));
00083 
00084     l.r_cpu = calloc(outputs*batch, sizeof(float));
00085     l.z_cpu = calloc(outputs*batch, sizeof(float));
00086     l.h_cpu = calloc(outputs*batch, sizeof(float));
00087 
00088     l.forward = forward_gru_layer;
00089     l.backward = backward_gru_layer;
00090     l.update = update_gru_layer;
00091 
00092 #ifdef GPU
00093     l.forward_gpu = forward_gru_layer_gpu;
00094     l.backward_gpu = backward_gru_layer_gpu;
00095     l.update_gpu = update_gru_layer_gpu;
00096 
00097     l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs);
00098     l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs);
00099     l.prev_state_gpu = cuda_make_array(l.output, batch*outputs);
00100     l.state_gpu = cuda_make_array(l.output, batch*outputs);
00101     l.output_gpu = cuda_make_array(l.output, batch*outputs*steps);
00102     l.delta_gpu = cuda_make_array(l.delta, batch*outputs*steps);
00103     l.r_gpu = cuda_make_array(l.output_gpu, batch*outputs);
00104     l.z_gpu = cuda_make_array(l.output_gpu, batch*outputs);
00105     l.h_gpu = cuda_make_array(l.output_gpu, batch*outputs);
00106 #endif
00107 
00108     return l;
00109 }
00110 
00111 void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay)
00112 {
00113     update_connected_layer(*(l.input_layer), batch, learning_rate, momentum, decay);
00114     update_connected_layer(*(l.self_layer), batch, learning_rate, momentum, decay);
00115     update_connected_layer(*(l.output_layer), batch, learning_rate, momentum, decay);
00116 }
00117 
00118 void forward_gru_layer(layer l, network_state state)
00119 {
00120     network_state s = {0};
00121     s.train = state.train;
00122     int i;
00123     layer input_z_layer = *(l.input_z_layer);
00124     layer input_r_layer = *(l.input_r_layer);
00125     layer input_h_layer = *(l.input_h_layer);
00126 
00127     layer state_z_layer = *(l.state_z_layer);
00128     layer state_r_layer = *(l.state_r_layer);
00129     layer state_h_layer = *(l.state_h_layer);
00130 
00131     fill_cpu(l.outputs * l.batch * l.steps, 0, input_z_layer.delta, 1);
00132     fill_cpu(l.outputs * l.batch * l.steps, 0, input_r_layer.delta, 1);
00133     fill_cpu(l.outputs * l.batch * l.steps, 0, input_h_layer.delta, 1);
00134 
00135     fill_cpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta, 1);
00136     fill_cpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta, 1);
00137     fill_cpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta, 1);
00138     if(state.train) {
00139         fill_cpu(l.outputs * l.batch * l.steps, 0, l.delta, 1);
00140         copy_cpu(l.outputs*l.batch, l.state, 1, l.prev_state, 1);
00141     }
00142 
00143     for (i = 0; i < l.steps; ++i) {
00144         s.input = l.state;
00145         forward_connected_layer(state_z_layer, s);
00146         forward_connected_layer(state_r_layer, s);
00147 
00148         s.input = state.input;
00149         forward_connected_layer(input_z_layer, s);
00150         forward_connected_layer(input_r_layer, s);
00151         forward_connected_layer(input_h_layer, s);
00152 
00153 
00154         copy_cpu(l.outputs*l.batch, input_z_layer.output, 1, l.z_cpu, 1);
00155         axpy_cpu(l.outputs*l.batch, 1, state_z_layer.output, 1, l.z_cpu, 1);
00156 
00157         copy_cpu(l.outputs*l.batch, input_r_layer.output, 1, l.r_cpu, 1);
00158         axpy_cpu(l.outputs*l.batch, 1, state_r_layer.output, 1, l.r_cpu, 1);
00159 
00160         activate_array(l.z_cpu, l.outputs*l.batch, LOGISTIC);
00161         activate_array(l.r_cpu, l.outputs*l.batch, LOGISTIC);
00162 
00163         copy_cpu(l.outputs*l.batch, l.state, 1, l.forgot_state, 1);
00164         mul_cpu(l.outputs*l.batch, l.r_cpu, 1, l.forgot_state, 1);
00165 
00166         s.input = l.forgot_state;
00167         forward_connected_layer(state_h_layer, s);
00168 
00169         copy_cpu(l.outputs*l.batch, input_h_layer.output, 1, l.h_cpu, 1);
00170         axpy_cpu(l.outputs*l.batch, 1, state_h_layer.output, 1, l.h_cpu, 1);
00171 
00172         #ifdef USET
00173         activate_array(l.h_cpu, l.outputs*l.batch, TANH);
00174         #else
00175         activate_array(l.h_cpu, l.outputs*l.batch, LOGISTIC);
00176         #endif
00177 
00178         weighted_sum_cpu(l.state, l.h_cpu, l.z_cpu, l.outputs*l.batch, l.output);
00179 
00180         copy_cpu(l.outputs*l.batch, l.output, 1, l.state, 1);
00181 
00182         state.input += l.inputs*l.batch;
00183         l.output += l.outputs*l.batch;
00184         increment_layer(&input_z_layer, 1);
00185         increment_layer(&input_r_layer, 1);
00186         increment_layer(&input_h_layer, 1);
00187 
00188         increment_layer(&state_z_layer, 1);
00189         increment_layer(&state_r_layer, 1);
00190         increment_layer(&state_h_layer, 1);
00191     }
00192 }
00193 
00194 void backward_gru_layer(layer l, network_state state)
00195 {
00196 }
00197 
00198 #ifdef GPU
00199 
00200 void pull_gru_layer(layer l)
00201 {
00202 }
00203 
00204 void push_gru_layer(layer l)
00205 {
00206 }
00207 
00208 void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay)
00209 {
00210     update_connected_layer_gpu(*(l.input_r_layer), batch, learning_rate, momentum, decay);
00211     update_connected_layer_gpu(*(l.input_z_layer), batch, learning_rate, momentum, decay);
00212     update_connected_layer_gpu(*(l.input_h_layer), batch, learning_rate, momentum, decay);
00213     update_connected_layer_gpu(*(l.state_r_layer), batch, learning_rate, momentum, decay);
00214     update_connected_layer_gpu(*(l.state_z_layer), batch, learning_rate, momentum, decay);
00215     update_connected_layer_gpu(*(l.state_h_layer), batch, learning_rate, momentum, decay);
00216 }
00217 
00218 void forward_gru_layer_gpu(layer l, network_state state)
00219 {
00220     network_state s = {0};
00221     s.train = state.train;
00222     int i;
00223     layer input_z_layer = *(l.input_z_layer);
00224     layer input_r_layer = *(l.input_r_layer);
00225     layer input_h_layer = *(l.input_h_layer);
00226 
00227     layer state_z_layer = *(l.state_z_layer);
00228     layer state_r_layer = *(l.state_r_layer);
00229     layer state_h_layer = *(l.state_h_layer);
00230 
00231     fill_ongpu(l.outputs * l.batch * l.steps, 0, input_z_layer.delta_gpu, 1);
00232     fill_ongpu(l.outputs * l.batch * l.steps, 0, input_r_layer.delta_gpu, 1);
00233     fill_ongpu(l.outputs * l.batch * l.steps, 0, input_h_layer.delta_gpu, 1);
00234 
00235     fill_ongpu(l.outputs * l.batch * l.steps, 0, state_z_layer.delta_gpu, 1);
00236     fill_ongpu(l.outputs * l.batch * l.steps, 0, state_r_layer.delta_gpu, 1);
00237     fill_ongpu(l.outputs * l.batch * l.steps, 0, state_h_layer.delta_gpu, 1);
00238     if(state.train) {
00239         fill_ongpu(l.outputs * l.batch * l.steps, 0, l.delta_gpu, 1);
00240         copy_ongpu(l.outputs*l.batch, l.state_gpu, 1, l.prev_state_gpu, 1);
00241     }
00242 
00243     for (i = 0; i < l.steps; ++i) {
00244         s.input = l.state_gpu;
00245         forward_connected_layer_gpu(state_z_layer, s);
00246         forward_connected_layer_gpu(state_r_layer, s);
00247 
00248         s.input = state.input;
00249         forward_connected_layer_gpu(input_z_layer, s);
00250         forward_connected_layer_gpu(input_r_layer, s);
00251         forward_connected_layer_gpu(input_h_layer, s);
00252 
00253 
00254         copy_ongpu(l.outputs*l.batch, input_z_layer.output_gpu, 1, l.z_gpu, 1);
00255         axpy_ongpu(l.outputs*l.batch, 1, state_z_layer.output_gpu, 1, l.z_gpu, 1);
00256 
00257         copy_ongpu(l.outputs*l.batch, input_r_layer.output_gpu, 1, l.r_gpu, 1);
00258         axpy_ongpu(l.outputs*l.batch, 1, state_r_layer.output_gpu, 1, l.r_gpu, 1);
00259 
00260         activate_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC);
00261         activate_array_ongpu(l.r_gpu, l.outputs*l.batch, LOGISTIC);
00262 
00263         copy_ongpu(l.outputs*l.batch, l.state_gpu, 1, l.forgot_state_gpu, 1);
00264         mul_ongpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1);
00265 
00266         s.input = l.forgot_state_gpu;
00267         forward_connected_layer_gpu(state_h_layer, s);
00268 
00269         copy_ongpu(l.outputs*l.batch, input_h_layer.output_gpu, 1, l.h_gpu, 1);
00270         axpy_ongpu(l.outputs*l.batch, 1, state_h_layer.output_gpu, 1, l.h_gpu, 1);
00271 
00272         #ifdef USET
00273         activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH);
00274         #else
00275         activate_array_ongpu(l.h_gpu, l.outputs*l.batch, LOGISTIC);
00276         #endif
00277 
00278         weighted_sum_gpu(l.state_gpu, l.h_gpu, l.z_gpu, l.outputs*l.batch, l.output_gpu);
00279 
00280         copy_ongpu(l.outputs*l.batch, l.output_gpu, 1, l.state_gpu, 1);
00281 
00282         state.input += l.inputs*l.batch;
00283         l.output_gpu += l.outputs*l.batch;
00284         increment_layer(&input_z_layer, 1);
00285         increment_layer(&input_r_layer, 1);
00286         increment_layer(&input_h_layer, 1);
00287 
00288         increment_layer(&state_z_layer, 1);
00289         increment_layer(&state_r_layer, 1);
00290         increment_layer(&state_h_layer, 1);
00291     }
00292 }
00293 
00294 void backward_gru_layer_gpu(layer l, network_state state)
00295 {
00296     network_state s = {0};
00297     s.train = state.train;
00298     int i;
00299     layer input_z_layer = *(l.input_z_layer);
00300     layer input_r_layer = *(l.input_r_layer);
00301     layer input_h_layer = *(l.input_h_layer);
00302 
00303     layer state_z_layer = *(l.state_z_layer);
00304     layer state_r_layer = *(l.state_r_layer);
00305     layer state_h_layer = *(l.state_h_layer);
00306 
00307     increment_layer(&input_z_layer, l.steps - 1);
00308     increment_layer(&input_r_layer, l.steps - 1);
00309     increment_layer(&input_h_layer, l.steps - 1);
00310 
00311     increment_layer(&state_z_layer, l.steps - 1);
00312     increment_layer(&state_r_layer, l.steps - 1);
00313     increment_layer(&state_h_layer, l.steps - 1);
00314 
00315     state.input += l.inputs*l.batch*(l.steps-1);
00316     if(state.delta) state.delta += l.inputs*l.batch*(l.steps-1);
00317     l.output_gpu += l.outputs*l.batch*(l.steps-1);
00318     l.delta_gpu += l.outputs*l.batch*(l.steps-1);
00319     for (i = l.steps-1; i >= 0; --i) {
00320         if(i != 0) copy_ongpu(l.outputs*l.batch, l.output_gpu - l.outputs*l.batch, 1, l.prev_state_gpu, 1);
00321         float *prev_delta_gpu = (i == 0) ? 0 : l.delta_gpu - l.outputs*l.batch;
00322 
00323         copy_ongpu(l.outputs*l.batch, input_z_layer.output_gpu, 1, l.z_gpu, 1);
00324         axpy_ongpu(l.outputs*l.batch, 1, state_z_layer.output_gpu, 1, l.z_gpu, 1);
00325 
00326         copy_ongpu(l.outputs*l.batch, input_r_layer.output_gpu, 1, l.r_gpu, 1);
00327         axpy_ongpu(l.outputs*l.batch, 1, state_r_layer.output_gpu, 1, l.r_gpu, 1);
00328 
00329         activate_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC);
00330         activate_array_ongpu(l.r_gpu, l.outputs*l.batch, LOGISTIC);
00331 
00332         copy_ongpu(l.outputs*l.batch, input_h_layer.output_gpu, 1, l.h_gpu, 1);
00333         axpy_ongpu(l.outputs*l.batch, 1, state_h_layer.output_gpu, 1, l.h_gpu, 1);
00334 
00335         #ifdef USET
00336         activate_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH);
00337         #else
00338         activate_array_ongpu(l.h_gpu, l.outputs*l.batch, LOGISTIC);
00339         #endif
00340         
00341         weighted_delta_gpu(l.prev_state_gpu, l.h_gpu, l.z_gpu, prev_delta_gpu, input_h_layer.delta_gpu, input_z_layer.delta_gpu, l.outputs*l.batch, l.delta_gpu);
00342 
00343         #ifdef USET
00344         gradient_array_ongpu(l.h_gpu, l.outputs*l.batch, TANH, input_h_layer.delta_gpu);
00345         #else
00346         gradient_array_ongpu(l.h_gpu, l.outputs*l.batch, LOGISTIC, input_h_layer.delta_gpu);
00347         #endif
00348 
00349         copy_ongpu(l.outputs*l.batch, input_h_layer.delta_gpu, 1, state_h_layer.delta_gpu, 1);
00350         
00351         copy_ongpu(l.outputs*l.batch, l.prev_state_gpu, 1, l.forgot_state_gpu, 1);
00352         mul_ongpu(l.outputs*l.batch, l.r_gpu, 1, l.forgot_state_gpu, 1);
00353         fill_ongpu(l.outputs*l.batch, 0, l.forgot_delta_gpu, 1);
00354 
00355         s.input = l.forgot_state_gpu;
00356         s.delta = l.forgot_delta_gpu;
00357         
00358         backward_connected_layer_gpu(state_h_layer, s);
00359         if(prev_delta_gpu) mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.r_gpu, prev_delta_gpu);
00360         mult_add_into_gpu(l.outputs*l.batch, l.forgot_delta_gpu, l.prev_state_gpu, input_r_layer.delta_gpu);
00361 
00362         gradient_array_ongpu(l.r_gpu, l.outputs*l.batch, LOGISTIC, input_r_layer.delta_gpu);
00363         copy_ongpu(l.outputs*l.batch, input_r_layer.delta_gpu, 1, state_r_layer.delta_gpu, 1);
00364 
00365         gradient_array_ongpu(l.z_gpu, l.outputs*l.batch, LOGISTIC, input_z_layer.delta_gpu);
00366         copy_ongpu(l.outputs*l.batch, input_z_layer.delta_gpu, 1, state_z_layer.delta_gpu, 1);
00367         
00368         s.input = l.prev_state_gpu;
00369         s.delta = prev_delta_gpu;
00370         
00371         backward_connected_layer_gpu(state_r_layer, s);
00372         backward_connected_layer_gpu(state_z_layer, s);
00373 
00374         s.input = state.input;
00375         s.delta = state.delta;
00376         
00377         backward_connected_layer_gpu(input_h_layer, s);
00378         backward_connected_layer_gpu(input_r_layer, s);
00379         backward_connected_layer_gpu(input_z_layer, s);
00380 
00381 
00382         state.input -= l.inputs*l.batch;
00383         if(state.delta) state.delta -= l.inputs*l.batch;
00384         l.output_gpu -= l.outputs*l.batch;
00385         l.delta_gpu -= l.outputs*l.batch;
00386         increment_layer(&input_z_layer, -1);
00387         increment_layer(&input_r_layer, -1);
00388         increment_layer(&input_h_layer, -1);
00389 
00390         increment_layer(&state_z_layer, -1);
00391         increment_layer(&state_r_layer, -1);
00392         increment_layer(&state_h_layer, -1);
00393     }
00394 }
00395 #endif


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:30