00001 #include "softmax_layer.h" 00002 #include "blas.h" 00003 #include "cuda.h" 00004 #include <float.h> 00005 #include <math.h> 00006 #include <stdlib.h> 00007 #include <stdio.h> 00008 #include <assert.h> 00009 00010 softmax_layer make_softmax_layer(int batch, int inputs, int groups) 00011 { 00012 assert(inputs%groups == 0); 00013 fprintf(stderr, "softmax %4d\n", inputs); 00014 softmax_layer l = {0}; 00015 l.type = SOFTMAX; 00016 l.batch = batch; 00017 l.groups = groups; 00018 l.inputs = inputs; 00019 l.outputs = inputs; 00020 l.output = calloc(inputs*batch, sizeof(float)); 00021 l.delta = calloc(inputs*batch, sizeof(float)); 00022 00023 l.forward = forward_softmax_layer; 00024 l.backward = backward_softmax_layer; 00025 #ifdef GPU 00026 l.forward_gpu = forward_softmax_layer_gpu; 00027 l.backward_gpu = backward_softmax_layer_gpu; 00028 00029 l.output_gpu = cuda_make_array(l.output, inputs*batch); 00030 l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 00031 #endif 00032 return l; 00033 } 00034 00035 void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output) 00036 { 00037 int b; 00038 for(b = 0; b < batch; ++b){ 00039 int i; 00040 int count = 0; 00041 for(i = 0; i < hierarchy->groups; ++i){ 00042 int group_size = hierarchy->group_size[i]; 00043 softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count); 00044 count += group_size; 00045 } 00046 } 00047 } 00048 00049 void forward_softmax_layer(const softmax_layer l, network_state state) 00050 { 00051 int b; 00052 int inputs = l.inputs / l.groups; 00053 int batch = l.batch * l.groups; 00054 if(l.softmax_tree){ 00055 softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output); 00056 } else { 00057 for(b = 0; b < batch; ++b){ 00058 softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs); 00059 } 00060 } 00061 } 00062 00063 void backward_softmax_layer(const softmax_layer l, network_state state) 00064 { 00065 int i; 00066 for(i = 0; i < l.inputs*l.batch; ++i){ 00067 state.delta[i] += l.delta[i]; 00068 } 00069 } 00070 00071 #ifdef GPU 00072 00073 void pull_softmax_layer_output(const softmax_layer layer) 00074 { 00075 cuda_pull_array(layer.output_gpu, layer.output, layer.inputs*layer.batch); 00076 } 00077 00078 void forward_softmax_layer_gpu(const softmax_layer l, network_state state) 00079 { 00080 int inputs = l.inputs / l.groups; 00081 int batch = l.batch * l.groups; 00082 if(l.softmax_tree){ 00083 int i; 00084 int count = 0; 00085 for (i = 0; i < l.softmax_tree->groups; ++i) { 00086 int group_size = l.softmax_tree->group_size[i]; 00087 softmax_gpu(state.input+count, group_size, inputs, batch, l.temperature, l.output_gpu + count); 00088 count += group_size; 00089 } 00090 } else { 00091 softmax_gpu(state.input, inputs, inputs, batch, l.temperature, l.output_gpu); 00092 } 00093 } 00094 00095 void backward_softmax_layer_gpu(const softmax_layer layer, network_state state) 00096 { 00097 axpy_ongpu(layer.batch*layer.inputs, 1, layer.delta_gpu, 1, state.delta, 1); 00098 } 00099 00100 #endif