blas.c
Go to the documentation of this file.
00001 #include "blas.h"
00002 #include "math.h"
00003 #include <assert.h>
00004 #include <float.h>
00005 #include <stdio.h>
00006 #include <stdlib.h>
00007 #include <string.h>
00008 void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out)
00009 {
00010     int b,i,j,k;
00011     int out_c = c/(stride*stride);
00012 
00013     for(b = 0; b < batch; ++b){
00014         for(k = 0; k < c; ++k){
00015             for(j = 0; j < h; ++j){
00016                 for(i = 0; i < w; ++i){
00017                     int in_index  = i + w*(j + h*(k + c*b));
00018                     int c2 = k % out_c;
00019                     int offset = k / out_c;
00020                     int w2 = i*stride + offset % stride;
00021                     int h2 = j*stride + offset / stride;
00022                     int out_index = w2 + w*stride*(h2 + h*stride*(c2 + out_c*b));
00023                     if(forward) out[out_index] = x[in_index];
00024                     else out[in_index] = x[out_index];
00025                 }
00026             }
00027         }
00028     }
00029 }
00030 
00031 void flatten(float *x, int size, int layers, int batch, int forward)
00032 {
00033     float *swap = calloc(size*layers*batch, sizeof(float));
00034     int i,c,b;
00035     for(b = 0; b < batch; ++b){
00036         for(c = 0; c < layers; ++c){
00037             for(i = 0; i < size; ++i){
00038                 int i1 = b*layers*size + c*size + i;
00039                 int i2 = b*layers*size + i*layers + c;
00040                 if (forward) swap[i2] = x[i1];
00041                 else swap[i1] = x[i2];
00042             }
00043         }
00044     }
00045     memcpy(x, swap, size*layers*batch*sizeof(float));
00046     free(swap);
00047 }
00048 
00049 void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
00050 {
00051     int i;
00052     for(i = 0; i < n; ++i){
00053         c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0);
00054     }
00055 }
00056 
00057 void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
00058 {
00059     int stride = w1/w2;
00060     int sample = w2/w1;
00061     assert(stride == h1/h2);
00062     assert(sample == h2/h1);
00063     if(stride < 1) stride = 1;
00064     if(sample < 1) sample = 1;
00065     int minw = (w1 < w2) ? w1 : w2;
00066     int minh = (h1 < h2) ? h1 : h2;
00067     int minc = (c1 < c2) ? c1 : c2;
00068 
00069     int i,j,k,b;
00070     for(b = 0; b < batch; ++b){
00071         for(k = 0; k < minc; ++k){
00072             for(j = 0; j < minh; ++j){
00073                 for(i = 0; i < minw; ++i){
00074                     int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
00075                     int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
00076                     out[out_index] += add[add_index];
00077                 }
00078             }
00079         }
00080     }
00081 }
00082 
00083 void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
00084 {
00085     float scale = 1./(batch * spatial);
00086     int i,j,k;
00087     for(i = 0; i < filters; ++i){
00088         mean[i] = 0;
00089         for(j = 0; j < batch; ++j){
00090             for(k = 0; k < spatial; ++k){
00091                 int index = j*filters*spatial + i*spatial + k;
00092                 mean[i] += x[index];
00093             }
00094         }
00095         mean[i] *= scale;
00096     }
00097 }
00098 
00099 void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
00100 {
00101     float scale = 1./(batch * spatial - 1);
00102     int i,j,k;
00103     for(i = 0; i < filters; ++i){
00104         variance[i] = 0;
00105         for(j = 0; j < batch; ++j){
00106             for(k = 0; k < spatial; ++k){
00107                 int index = j*filters*spatial + i*spatial + k;
00108                 variance[i] += pow((x[index] - mean[i]), 2);
00109             }
00110         }
00111         variance[i] *= scale;
00112     }
00113 }
00114 
00115 void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
00116 {
00117     int b, f, i;
00118     for(b = 0; b < batch; ++b){
00119         for(f = 0; f < filters; ++f){
00120             for(i = 0; i < spatial; ++i){
00121                 int index = b*filters*spatial + f*spatial + i;
00122                 x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
00123             }
00124         }
00125     }
00126 }
00127 
00128 void const_cpu(int N, float ALPHA, float *X, int INCX)
00129 {
00130     int i;
00131     for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
00132 }
00133 
00134 void mul_cpu(int N, float *X, int INCX, float *Y, int INCY)
00135 {
00136     int i;
00137     for(i = 0; i < N; ++i) Y[i*INCY] *= X[i*INCX];
00138 }
00139 
00140 void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
00141 {
00142     int i;
00143     for(i = 0; i < N; ++i) Y[i*INCY] = pow(X[i*INCX], ALPHA);
00144 }
00145 
00146 void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY)
00147 {
00148     int i;
00149     for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX];
00150 }
00151 
00152 void scal_cpu(int N, float ALPHA, float *X, int INCX)
00153 {
00154     int i;
00155     for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
00156 }
00157 
00158 void fill_cpu(int N, float ALPHA, float *X, int INCX)
00159 {
00160     int i;
00161     for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
00162 }
00163 
00164 void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
00165 {
00166     int i;
00167     for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
00168 }
00169 
00170 void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
00171 {
00172     int i;
00173     for(i = 0; i < n; ++i){
00174         float diff = truth[i] - pred[i];
00175         float abs_val = fabs(diff);
00176         if(abs_val < 1) {
00177             error[i] = diff * diff;
00178             delta[i] = diff;
00179         }
00180         else {
00181             error[i] = 2*abs_val - 1;
00182             delta[i] = (diff < 0) ? -1 : 1;
00183         }
00184     }
00185 }
00186 
00187 void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
00188 {
00189     int i;
00190     for(i = 0; i < n; ++i){
00191         float diff = truth[i] - pred[i];
00192         error[i] = diff * diff;
00193         delta[i] = diff;
00194     }
00195 }
00196 
00197 float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
00198 {
00199     int i;
00200     float dot = 0;
00201     for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY];
00202     return dot;
00203 }
00204 
00205 void softmax(float *input, int n, float temp, float *output)
00206 {
00207     int i;
00208     float sum = 0;
00209     float largest = -FLT_MAX;
00210     for(i = 0; i < n; ++i){
00211         if(input[i] > largest) largest = input[i];
00212     }
00213     for(i = 0; i < n; ++i){
00214         float e = exp(input[i]/temp - largest/temp);
00215         sum += e;
00216         output[i] = e;
00217     }
00218     for(i = 0; i < n; ++i){
00219         output[i] /= sum;
00220     }
00221 }
00222 


rail_object_detector
Author(s):
autogenerated on Sat Jun 8 2019 20:26:29