00001 #include "normalization_layer.h"
00002 #include "blas.h"
00003 #include <stdio.h>
00004
00005 layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
00006 {
00007 fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
00008 layer layer = {0};
00009 layer.type = NORMALIZATION;
00010 layer.batch = batch;
00011 layer.h = layer.out_h = h;
00012 layer.w = layer.out_w = w;
00013 layer.c = layer.out_c = c;
00014 layer.kappa = kappa;
00015 layer.size = size;
00016 layer.alpha = alpha;
00017 layer.beta = beta;
00018 layer.output = calloc(h * w * c * batch, sizeof(float));
00019 layer.delta = calloc(h * w * c * batch, sizeof(float));
00020 layer.squared = calloc(h * w * c * batch, sizeof(float));
00021 layer.norms = calloc(h * w * c * batch, sizeof(float));
00022 layer.inputs = w*h*c;
00023 layer.outputs = layer.inputs;
00024
00025 layer.forward = forward_normalization_layer;
00026 layer.backward = backward_normalization_layer;
00027 #ifdef GPU
00028 layer.forward_gpu = forward_normalization_layer_gpu;
00029 layer.backward_gpu = backward_normalization_layer_gpu;
00030
00031 layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
00032 layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
00033 layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
00034 layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch);
00035 #endif
00036 return layer;
00037 }
00038
00039 void resize_normalization_layer(layer *layer, int w, int h)
00040 {
00041 int c = layer->c;
00042 int batch = layer->batch;
00043 layer->h = h;
00044 layer->w = w;
00045 layer->out_h = h;
00046 layer->out_w = w;
00047 layer->inputs = w*h*c;
00048 layer->outputs = layer->inputs;
00049 layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
00050 layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
00051 layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
00052 layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
00053 #ifdef GPU
00054 cuda_free(layer->output_gpu);
00055 cuda_free(layer->delta_gpu);
00056 cuda_free(layer->squared_gpu);
00057 cuda_free(layer->norms_gpu);
00058 layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch);
00059 layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch);
00060 layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
00061 layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch);
00062 #endif
00063 }
00064
00065 void forward_normalization_layer(const layer layer, network_state state)
00066 {
00067 int k,b;
00068 int w = layer.w;
00069 int h = layer.h;
00070 int c = layer.c;
00071 scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1);
00072
00073 for(b = 0; b < layer.batch; ++b){
00074 float *squared = layer.squared + w*h*c*b;
00075 float *norms = layer.norms + w*h*c*b;
00076 float *input = state.input + w*h*c*b;
00077 pow_cpu(w*h*c, 2, input, 1, squared, 1);
00078
00079 const_cpu(w*h, layer.kappa, norms, 1);
00080 for(k = 0; k < layer.size/2; ++k){
00081 axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
00082 }
00083
00084 for(k = 1; k < layer.c; ++k){
00085 copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
00086 int prev = k - ((layer.size-1)/2) - 1;
00087 int next = k + (layer.size/2);
00088 if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
00089 if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
00090 }
00091 }
00092 pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
00093 mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
00094 }
00095
00096 void backward_normalization_layer(const layer layer, network_state state)
00097 {
00098
00099
00100
00101 int w = layer.w;
00102 int h = layer.h;
00103 int c = layer.c;
00104 pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
00105 mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
00106 }
00107
00108 #ifdef GPU
00109 void forward_normalization_layer_gpu(const layer layer, network_state state)
00110 {
00111 int k,b;
00112 int w = layer.w;
00113 int h = layer.h;
00114 int c = layer.c;
00115 scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1);
00116
00117 for(b = 0; b < layer.batch; ++b){
00118 float *squared = layer.squared_gpu + w*h*c*b;
00119 float *norms = layer.norms_gpu + w*h*c*b;
00120 float *input = state.input + w*h*c*b;
00121 pow_ongpu(w*h*c, 2, input, 1, squared, 1);
00122
00123 const_ongpu(w*h, layer.kappa, norms, 1);
00124 for(k = 0; k < layer.size/2; ++k){
00125 axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
00126 }
00127
00128 for(k = 1; k < layer.c; ++k){
00129 copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
00130 int prev = k - ((layer.size-1)/2) - 1;
00131 int next = k + (layer.size/2);
00132 if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
00133 if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
00134 }
00135 }
00136 pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
00137 mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
00138 }
00139
00140 void backward_normalization_layer_gpu(const layer layer, network_state state)
00141 {
00142
00143
00144 int w = layer.w;
00145 int h = layer.h;
00146 int c = layer.c;
00147 pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
00148 mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
00149 }
00150 #endif