bench_norm.cpp
Go to the documentation of this file.
1 #include <typeinfo>
2 #include <iostream>
3 #include <Eigen/Core>
4 #include "BenchTimer.h"
5 using namespace Eigen;
6 using namespace std;
7 
8 template<typename T>
10 {
11  return v.norm();
12 }
13 
14 template<typename T>
16 {
17  return v.stableNorm();
18 }
19 
20 template<typename T>
22 {
23  return v.hypotNorm();
24 }
25 
26 template<typename T>
28 {
29  return v.blueNorm();
30 }
31 
32 template<typename T>
34 {
35  typedef typename T::Scalar Scalar;
36  int n = v.size();
37  Scalar scale = 0;
38  Scalar ssq = 1;
39  for (int i=0;i<n;++i)
40  {
41  Scalar ax = std::abs(v.coeff(i));
42  if (scale >= ax)
43  {
44  ssq += numext::abs2(ax/scale);
45  }
46  else
47  {
48  ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
49  scale = ax;
50  }
51  }
52  return scale * std::sqrt(ssq);
53 }
54 
55 template<typename T>
57 {
58  typedef typename T::Scalar Scalar;
59  Scalar s = v.array().abs().maxCoeff();
60  return s*(v/s).norm();
61 }
62 
63 template<typename T>
65 {
66  return v.stableNorm();
67 }
68 
69 template<typename T>
71 {
72  int n =v.size() / 2;
73  for (int i=0;i<n;++i)
74  v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
75  n = n/2;
76  while (n>0)
77  {
78  for (int i=0;i<n;++i)
79  v(i) = v(2*i) + v(2*i+1);
80  n = n/2;
81  }
82  return std::sqrt(v(0));
83 }
84 
85 namespace Eigen {
86 namespace internal {
87 #ifdef EIGEN_VECTORIZE
88 Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
89 Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
90 
91 Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
92 Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
93 #endif
94 }
95 }
96 
97 template<typename T>
99 {
100  #ifndef EIGEN_VECTORIZE
101  return v.blueNorm();
102  #else
103  typedef typename T::Scalar Scalar;
104 
105  static int nmax = 0;
106  static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
107  int n;
108 
109  if(nmax <= 0)
110  {
111  int nbig, ibeta, it, iemin, iemax, iexp;
112  Scalar abig, eps;
113 
114  nbig = std::numeric_limits<int>::max(); // largest integer
115  ibeta = std::numeric_limits<Scalar>::radix; //NumTraits<Scalar>::Base; // base for floating-point numbers
116  it = std::numeric_limits<Scalar>::digits; //NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa
117  iemin = std::numeric_limits<Scalar>::min_exponent; // minimum exponent
118  iemax = std::numeric_limits<Scalar>::max_exponent; // maximum exponent
119  rbig = std::numeric_limits<Scalar>::max(); // largest floating-point number
120 
121  // Check the basic machine-dependent constants.
122  if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
123  || (it<=4 && ibeta <= 3 ) || it<2)
124  {
125  eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
126  }
127  iexp = -((1-iemin)/2);
128  b1 = std::pow(ibeta, iexp); // lower boundary of midrange
129  iexp = (iemax + 1 - it)/2;
130  b2 = std::pow(ibeta,iexp); // upper boundary of midrange
131 
132  iexp = (2-iemin)/2;
133  s1m = std::pow(ibeta,iexp); // scaling factor for lower range
134  iexp = - ((iemax+it)/2);
135  s2m = std::pow(ibeta,iexp); // scaling factor for upper range
136 
137  overfl = rbig*s2m; // overfow boundary for abig
138  eps = std::pow(ibeta, 1-it);
139  relerr = std::sqrt(eps); // tolerance for neglecting asml
140  abig = 1.0/eps - 1.0;
141  if (Scalar(nbig)>abig) nmax = abig; // largest safe n
142  else nmax = nbig;
143  }
144 
147  Packet pasml = internal::pset1<Packet>(Scalar(0));
148  Packet pamed = internal::pset1<Packet>(Scalar(0));
149  Packet pabig = internal::pset1<Packet>(Scalar(0));
150  Packet ps2m = internal::pset1<Packet>(s2m);
151  Packet ps1m = internal::pset1<Packet>(s1m);
152  Packet pb2 = internal::pset1<Packet>(b2);
153  Packet pb1 = internal::pset1<Packet>(b1);
154  for(int j=0; j<v.size(); j+=ps)
155  {
156  Packet ax = internal::pabs(v.template packet<Aligned>(j));
157  Packet ax_s2m = internal::pmul(ax,ps2m);
158  Packet ax_s1m = internal::pmul(ax,ps1m);
159  Packet maskBig = internal::plt(pb2,ax);
160  Packet maskSml = internal::plt(ax,pb1);
161 
162 // Packet maskMed = internal::pand(maskSml,maskBig);
163 // Packet scale = internal::pset1(Scalar(0));
164 // scale = internal::por(scale, internal::pand(maskBig,ps2m));
165 // scale = internal::por(scale, internal::pand(maskSml,ps1m));
166 // scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
167 // ax = internal::pmul(ax,scale);
168 // ax = internal::pmul(ax,ax);
169 // pabig = internal::padd(pabig, internal::pand(maskBig, ax));
170 // pasml = internal::padd(pasml, internal::pand(maskSml, ax));
171 // pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
172 
173 
174  pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
175  pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
176  pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
177  }
178  Scalar abig = internal::predux(pabig);
179  Scalar asml = internal::predux(pasml);
180  Scalar amed = internal::predux(pamed);
181  if(abig > Scalar(0))
182  {
183  abig = std::sqrt(abig);
184  if(abig > overfl)
185  {
186  eigen_assert(false && "overflow");
187  return rbig;
188  }
189  if(amed > Scalar(0))
190  {
191  abig = abig/s2m;
192  amed = std::sqrt(amed);
193  }
194  else
195  {
196  return abig/s2m;
197  }
198 
199  }
200  else if(asml > Scalar(0))
201  {
202  if (amed > Scalar(0))
203  {
204  abig = std::sqrt(amed);
205  amed = std::sqrt(asml) / s1m;
206  }
207  else
208  {
209  return std::sqrt(asml)/s1m;
210  }
211  }
212  else
213  {
214  return std::sqrt(amed);
215  }
216  asml = std::min(abig, amed);
217  abig = std::max(abig, amed);
218  if(asml <= abig*relerr)
219  return abig;
220  else
221  return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
222  #endif
223 }
224 
225 #define BENCH_PERF(NRM) { \
226  float af = 0; double ad = 0; std::complex<float> ac = 0; \
227  Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
228  for (int k=0; k<tries; ++k) { \
229  tf.start(); \
230  for (int i=0; i<iters; ++i) { af += NRM(vf); } \
231  tf.stop(); \
232  } \
233  for (int k=0; k<tries; ++k) { \
234  td.start(); \
235  for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
236  td.stop(); \
237  } \
238  /*for (int k=0; k<std::max(1,tries/3); ++k) { \
239  tcf.start(); \
240  for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
241  tcf.stop(); \
242  } */\
243  std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
244 }
245 
246 void check_accuracy(double basef, double based, int s)
247 {
248  double yf = basef * std::abs(internal::random<double>());
249  double yd = based * std::abs(internal::random<double>());
250  VectorXf vf = VectorXf::Ones(s) * yf;
251  VectorXd vd = VectorXd::Ones(s) * yd;
252 
253  std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
254  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
255  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
256  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
257  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
258  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
259  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
260  std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
261 }
262 
263 void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
264 {
265  VectorXf vf(s);
266  VectorXd vd(s);
267  for (int i=0; i<s; ++i)
268  {
269  vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
270  vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
271  }
272 
273  //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
274  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
275  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
276  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
277  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
278  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
279  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
280 // std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
281 }
282 
283 int main(int argc, char** argv)
284 {
285  int tries = 10;
286  int iters = 100000;
287  double y = 1.1345743233455785456788e12 * internal::random<double>();
288  VectorXf v = VectorXf::Ones(1024) * y;
289 
290 // return 0;
291  int s = 10000;
292  double basef_ok = 1.1345743233455785456788e15;
293  double based_ok = 1.1345743233455785456788e95;
294 
295  double basef_under = 1.1345743233455785456788e-27;
296  double based_under = 1.1345743233455785456788e-303;
297 
298  double basef_over = 1.1345743233455785456788e+27;
299  double based_over = 1.1345743233455785456788e+302;
300 
301  std::cout.precision(20);
302 
303  std::cerr << "\nNo under/overflow:\n";
304  check_accuracy(basef_ok, based_ok, s);
305 
306  std::cerr << "\nUnderflow:\n";
307  check_accuracy(basef_under, based_under, s);
308 
309  std::cerr << "\nOverflow:\n";
310  check_accuracy(basef_over, based_over, s);
311 
312  std::cerr << "\nVarying (over):\n";
313  for (int k=0; k<1; ++k)
314  {
315  check_accuracy_var(20,27,190,302,s);
316  std::cout << "\n";
317  }
318 
319  std::cerr << "\nVarying (under):\n";
320  for (int k=0; k<1; ++k)
321  {
322  check_accuracy_var(-27,20,-302,-190,s);
323  std::cout << "\n";
324  }
325 
326  y = 1;
327  std::cout.precision(4);
328  int s1 = 1024*1024*32;
329  std::cerr << "Performance (out of cache, " << s1 << "):\n";
330  {
331  int iters = 1;
332  VectorXf vf = VectorXf::Random(s1) * y;
333  VectorXd vd = VectorXd::Random(s1) * y;
334  VectorXcf vcf = VectorXcf::Random(s1) * y;
343  }
344 
345  std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
346  {
347  int iters = 100000;
348  VectorXf vf = VectorXf::Random(512) * y;
349  VectorXd vd = VectorXd::Random(512) * y;
350  VectorXcf vcf = VectorXcf::Random(512) * y;
359  }
360 }
internal::packet_traits< Scalar >::type Packet
SCALAR Scalar
Definition: bench_gemm.cpp:33
#define max(a, b)
Definition: datatypes.h:20
Scalar * b
Definition: benchVecAdd.cpp:17
#define BENCH_PERF(NRM)
Definition: bench_norm.cpp:225
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
Definition: bench_norm.cpp:263
EIGEN_DONT_INLINE T::Scalar twopassNorm(T &v)
Definition: bench_norm.cpp:56
#define min(a, b)
Definition: datatypes.h:19
EIGEN_DONT_INLINE T::Scalar pblueNorm(const T &v)
Definition: bench_norm.cpp:98
ArrayXcf v
Definition: Cwise_arg.cpp:1
int n
EIGEN_DONT_INLINE T::Scalar hypotNorm(T &v)
Definition: bench_norm.cpp:21
EIGEN_DEVICE_FUNC const SqrtReturnType sqrt() const
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
Definition: Half.h:150
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Derived::RealScalar relerr(const MatrixBase< Derived > &A, const MatrixBase< OtherDerived > &B)
int RealScalar int RealScalar int RealScalar RealScalar * ps
#define EIGEN_DONT_INLINE
Definition: Macros.h:517
Array33i a
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
EIGEN_DONT_INLINE T::Scalar divacNorm(T &v)
Definition: bench_norm.cpp:70
EIGEN_DONT_INLINE T::Scalar bl2passNorm(T &v)
Definition: bench_norm.cpp:64
EIGEN_DONT_INLINE T::Scalar lapackNorm(T &v)
Definition: bench_norm.cpp:33
Vector2 b2(4,-5)
#define eigen_assert(x)
Definition: Macros.h:579
RealScalar s
EIGEN_DONT_INLINE T::Scalar sqsumNorm(T &v)
Definition: bench_norm.cpp:9
EIGEN_DONT_INLINE T::Scalar stableNorm(T &v)
Definition: bench_norm.cpp:15
EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Abs2ReturnType abs2() const
Vector2 b1(2,-1)
int main(int argc, char **argv)
Definition: bench_norm.cpp:283
EIGEN_DEVICE_FUNC Packet pandnot(const Packet &a, const Packet &b)
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics scale
void check_accuracy(double basef, double based, int s)
Definition: bench_norm.cpp:246
EIGEN_DONT_INLINE T::Scalar blueNorm(T &v)
Definition: bench_norm.cpp:27
Jet< T, N > pow(const Jet< T, N > &f, double g)
Definition: jet.h:570
#define abs(x)
Definition: datatypes.h:17
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
std::ptrdiff_t j
EIGEN_DEVICE_FUNC Packet pand(const Packet &a, const Packet &b)
const T & y
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)


gtsam
Author(s):
autogenerated on Sat May 8 2021 02:41:41