00001 #include "blas.h" 00002 #include "math.h" 00003 #include <assert.h> 00004 #include <float.h> 00005 #include <stdio.h> 00006 #include <stdlib.h> 00007 #include <string.h> 00008 void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out) 00009 { 00010 int b,i,j,k; 00011 int out_c = c/(stride*stride); 00012 00013 for(b = 0; b < batch; ++b){ 00014 for(k = 0; k < c; ++k){ 00015 for(j = 0; j < h; ++j){ 00016 for(i = 0; i < w; ++i){ 00017 int in_index = i + w*(j + h*(k + c*b)); 00018 int c2 = k % out_c; 00019 int offset = k / out_c; 00020 int w2 = i*stride + offset % stride; 00021 int h2 = j*stride + offset / stride; 00022 int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b)); 00023 if(forward) out[out_index] = x[in_index]; 00024 else out[in_index] = x[out_index]; 00025 } 00026 } 00027 } 00028 } 00029 } 00030 00031 void flatten(float *x, int size, int layers, int batch, int forward) 00032 { 00033 float *swap = calloc(size*layers*batch, sizeof(float)); 00034 int i,c,b; 00035 for(b = 0; b < batch; ++b){ 00036 for(c = 0; c < layers; ++c){ 00037 for(i = 0; i < size; ++i){ 00038 int i1 = b*layers*size + c*size + i; 00039 int i2 = b*layers*size + i*layers + c; 00040 if (forward) swap[i2] = x[i1]; 00041 else swap[i1] = x[i2]; 00042 } 00043 } 00044 } 00045 memcpy(x, swap, size*layers*batch*sizeof(float)); 00046 free(swap); 00047 } 00048 00049 void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c) 00050 { 00051 int i; 00052 for(i = 0; i < n; ++i){ 00053 c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0); 00054 } 00055 } 00056 00057 void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out) 00058 { 00059 int stride = w1/w2; 00060 int sample = w2/w1; 00061 assert(stride == h1/h2); 00062 assert(sample == h2/h1); 00063 if(stride < 1) stride = 1; 00064 if(sample < 1) sample = 1; 00065 int minw = (w1 < w2) ? w1 : w2; 00066 int minh = (h1 < h2) ? h1 : h2; 00067 int minc = (c1 < c2) ? c1 : c2; 00068 00069 int i,j,k,b; 00070 for(b = 0; b < batch; ++b){ 00071 for(k = 0; k < minc; ++k){ 00072 for(j = 0; j < minh; ++j){ 00073 for(i = 0; i < minw; ++i){ 00074 int out_index = i*sample + w2*(j*sample + h2*(k + c2*b)); 00075 int add_index = i*stride + w1*(j*stride + h1*(k + c1*b)); 00076 out[out_index] += add[add_index]; 00077 } 00078 } 00079 } 00080 } 00081 } 00082 00083 void mean_cpu(float *x, int batch, int filters, int spatial, float *mean) 00084 { 00085 float scale = 1./(batch * spatial); 00086 int i,j,k; 00087 for(i = 0; i < filters; ++i){ 00088 mean[i] = 0; 00089 for(j = 0; j < batch; ++j){ 00090 for(k = 0; k < spatial; ++k){ 00091 int index = j*filters*spatial + i*spatial + k; 00092 mean[i] += x[index]; 00093 } 00094 } 00095 mean[i] *= scale; 00096 } 00097 } 00098 00099 void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance) 00100 { 00101 float scale = 1./(batch * spatial - 1); 00102 int i,j,k; 00103 for(i = 0; i < filters; ++i){ 00104 variance[i] = 0; 00105 for(j = 0; j < batch; ++j){ 00106 for(k = 0; k < spatial; ++k){ 00107 int index = j*filters*spatial + i*spatial + k; 00108 variance[i] += pow((x[index] - mean[i]), 2); 00109 } 00110 } 00111 variance[i] *= scale; 00112 } 00113 } 00114 00115 void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial) 00116 { 00117 int b, f, i; 00118 for(b = 0; b < batch; ++b){ 00119 for(f = 0; f < filters; ++f){ 00120 for(i = 0; i < spatial; ++i){ 00121 int index = b*filters*spatial + f*spatial + i; 00122 x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f); 00123 } 00124 } 00125 } 00126 } 00127 00128 void const_cpu(int N, float ALPHA, float *X, int INCX) 00129 { 00130 int i; 00131 for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; 00132 } 00133 00134 void mul_cpu(int N, float *X, int INCX, float *Y, int INCY) 00135 { 00136 int i; 00137 for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX]; 00138 } 00139 00140 void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 00141 { 00142 int i; 00143 for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA); 00144 } 00145 00146 void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) 00147 { 00148 int i; 00149 for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; 00150 } 00151 00152 void scal_cpu(int N, float ALPHA, float *X, int INCX) 00153 { 00154 int i; 00155 for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; 00156 } 00157 00158 void fill_cpu(int N, float ALPHA, float *X, int INCX) 00159 { 00160 int i; 00161 for(i = 0; i < N; ++i) X[i*INCX] = ALPHA; 00162 } 00163 00164 void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) 00165 { 00166 int i; 00167 for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; 00168 } 00169 00170 void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error) 00171 { 00172 int i; 00173 for(i = 0; i < n; ++i){ 00174 float diff = truth[i] - pred[i]; 00175 float abs_val = fabs(diff); 00176 if(abs_val < 1) { 00177 error[i] = diff * diff; 00178 delta[i] = diff; 00179 } 00180 else { 00181 error[i] = 2*abs_val - 1; 00182 delta[i] = (diff < 0) ? -1 : 1; 00183 } 00184 } 00185 } 00186 00187 void l2_cpu(int n, float *pred, float *truth, float *delta, float *error) 00188 { 00189 int i; 00190 for(i = 0; i < n; ++i){ 00191 float diff = truth[i] - pred[i]; 00192 error[i] = diff * diff; 00193 delta[i] = diff; 00194 } 00195 } 00196 00197 float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) 00198 { 00199 int i; 00200 float dot = 0; 00201 for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; 00202 return dot; 00203 } 00204 00205 void softmax(float *input, int n, float temp, float *output) 00206 { 00207 int i; 00208 float sum = 0; 00209 float largest = -FLT_MAX; 00210 for(i = 0; i < n; ++i){ 00211 if(input[i] > largest) largest = input[i]; 00212 } 00213 for(i = 0; i < n; ++i){ 00214 float e = exp(input[i]/temp - largest/temp); 00215 sum += e; 00216 output[i] = e; 00217 } 00218 for(i = 0; i < n; ++i){ 00219 output[i] /= sum; 00220 } 00221 } 00222