bench_norm.cpp
Go to the documentation of this file.
1 #include <typeinfo>
2 #include <iostream>
3 #include <Eigen/Core>
4 #include "BenchTimer.h"
5 using namespace Eigen;
6 using namespace std;
7 
8 template<typename T>
10 {
11  return v.norm();
12 }
13 
14 template<typename T>
16 {
17  return v.stableNorm();
18 }
19 
20 template<typename T>
22 {
23  return v.hypotNorm();
24 }
25 
26 template<typename T>
28 {
29  return v.blueNorm();
30 }
31 
32 template<typename T>
34 {
35  typedef typename T::Scalar Scalar;
36  int n = v.size();
37  Scalar scale = 0;
38  Scalar ssq = 1;
39  for (int i=0;i<n;++i)
40  {
41  Scalar ax = std::abs(v.coeff(i));
42  if (scale >= ax)
43  {
44  ssq += numext::abs2(ax/scale);
45  }
46  else
47  {
48  ssq = Scalar(1) + ssq * numext::abs2(scale/ax);
49  scale = ax;
50  }
51  }
52  return scale * std::sqrt(ssq);
53 }
54 
55 template<typename T>
57 {
58  typedef typename T::Scalar Scalar;
59  Scalar s = v.array().abs().maxCoeff();
60  return s*(v/s).norm();
61 }
62 
63 template<typename T>
65 {
66  return v.stableNorm();
67 }
68 
69 template<typename T>
71 {
72  int n =v.size() / 2;
73  for (int i=0;i<n;++i)
74  v(i) = v(2*i)*v(2*i) + v(2*i+1)*v(2*i+1);
75  n = n/2;
76  while (n>0)
77  {
78  for (int i=0;i<n;++i)
79  v(i) = v(2*i) + v(2*i+1);
80  n = n/2;
81  }
82  return std::sqrt(v(0));
83 }
84 
85 namespace Eigen {
86 namespace internal {
87 #ifdef EIGEN_VECTORIZE
88 Packet4f plt(const Packet4f& a, Packet4f& b) { return _mm_cmplt_ps(a,b); }
89 Packet2d plt(const Packet2d& a, Packet2d& b) { return _mm_cmplt_pd(a,b); }
90 
91 Packet4f pandnot(const Packet4f& a, Packet4f& b) { return _mm_andnot_ps(a,b); }
92 Packet2d pandnot(const Packet2d& a, Packet2d& b) { return _mm_andnot_pd(a,b); }
93 #endif
94 }
95 }
96 
97 template<typename T>
99 {
100  #ifndef EIGEN_VECTORIZE
101  return v.blueNorm();
102  #else
103  typedef typename T::Scalar Scalar;
104 
105  static int nmax = 0;
106  static Scalar b1, b2, s1m, s2m, overfl, rbig, relerr;
107  int n;
108 
109  if(nmax <= 0)
110  {
111  int nbig, ibeta, it, iemin, iemax, iexp;
112  Scalar abig, eps;
113 
114  nbig = NumTraits<int>::highest(); // largest integer
115  ibeta = std::numeric_limits<Scalar>::radix; // NumTraits<Scalar>::Base; // base for floating-point numbers
116  it = NumTraits<Scalar>::digits(); // NumTraits<Scalar>::Mantissa; // number of base-beta digits in mantissa
117  iemin = NumTraits<Scalar>::min_exponent(); // minimum exponent
118  iemax = NumTraits<Scalar>::max_exponent(); // maximum exponent
119  rbig = NumTraits<Scalar>::highest(); // largest floating-point number
120 
121  // Check the basic machine-dependent constants.
122  if(iemin > 1 - 2*it || 1+it>iemax || (it==2 && ibeta<5)
123  || (it<=4 && ibeta <= 3 ) || it<2)
124  {
125  eigen_assert(false && "the algorithm cannot be guaranteed on this computer");
126  }
127  iexp = -((1-iemin)/2);
128  b1 = std::pow(ibeta, iexp); // lower boundary of midrange
129  iexp = (iemax + 1 - it)/2;
130  b2 = std::pow(ibeta,iexp); // upper boundary of midrange
131 
132  iexp = (2-iemin)/2;
133  s1m = std::pow(ibeta,iexp); // scaling factor for lower range
134  iexp = - ((iemax+it)/2);
135  s2m = std::pow(ibeta,iexp); // scaling factor for upper range
136 
137  overfl = rbig*s2m; // overflow boundary for abig
138  eps = std::pow(ibeta, 1-it);
139  relerr = std::sqrt(eps); // tolerance for neglecting asml
140  abig = 1.0/eps - 1.0;
141  if (Scalar(nbig)>abig) nmax = abig; // largest safe n
142  else nmax = nbig;
143  }
144 
147  Packet pasml = internal::pset1<Packet>(Scalar(0));
148  Packet pamed = internal::pset1<Packet>(Scalar(0));
149  Packet pabig = internal::pset1<Packet>(Scalar(0));
150  Packet ps2m = internal::pset1<Packet>(s2m);
151  Packet ps1m = internal::pset1<Packet>(s1m);
152  Packet pb2 = internal::pset1<Packet>(b2);
153  Packet pb1 = internal::pset1<Packet>(b1);
154  for(int j=0; j<v.size(); j+=ps)
155  {
156  Packet ax = internal::pabs(v.template packet<Aligned>(j));
157  Packet ax_s2m = internal::pmul(ax,ps2m);
158  Packet ax_s1m = internal::pmul(ax,ps1m);
159  Packet maskBig = internal::plt(pb2,ax);
160  Packet maskSml = internal::plt(ax,pb1);
161 
162 // Packet maskMed = internal::pand(maskSml,maskBig);
163 // Packet scale = internal::pset1(Scalar(0));
164 // scale = internal::por(scale, internal::pand(maskBig,ps2m));
165 // scale = internal::por(scale, internal::pand(maskSml,ps1m));
166 // scale = internal::por(scale, internal::pandnot(internal::pset1(Scalar(1)),maskMed));
167 // ax = internal::pmul(ax,scale);
168 // ax = internal::pmul(ax,ax);
169 // pabig = internal::padd(pabig, internal::pand(maskBig, ax));
170 // pasml = internal::padd(pasml, internal::pand(maskSml, ax));
171 // pamed = internal::padd(pamed, internal::pandnot(ax,maskMed));
172 
173 
174  pabig = internal::padd(pabig, internal::pand(maskBig, internal::pmul(ax_s2m,ax_s2m)));
175  pasml = internal::padd(pasml, internal::pand(maskSml, internal::pmul(ax_s1m,ax_s1m)));
176  pamed = internal::padd(pamed, internal::pandnot(internal::pmul(ax,ax),internal::pand(maskSml,maskBig)));
177  }
178  Scalar abig = internal::predux(pabig);
179  Scalar asml = internal::predux(pasml);
180  Scalar amed = internal::predux(pamed);
181  if(abig > Scalar(0))
182  {
183  abig = std::sqrt(abig);
184  if(abig > overfl)
185  {
186  eigen_assert(false && "overflow");
187  return rbig;
188  }
189  if(amed > Scalar(0))
190  {
191  abig = abig/s2m;
192  amed = std::sqrt(amed);
193  }
194  else
195  {
196  return abig/s2m;
197  }
198 
199  }
200  else if(asml > Scalar(0))
201  {
202  if (amed > Scalar(0))
203  {
204  abig = std::sqrt(amed);
205  amed = std::sqrt(asml) / s1m;
206  }
207  else
208  {
209  return std::sqrt(asml)/s1m;
210  }
211  }
212  else
213  {
214  return std::sqrt(amed);
215  }
216  asml = std::min(abig, amed);
217  abig = std::max(abig, amed);
218  if(asml <= abig*relerr)
219  return abig;
220  else
221  return abig * std::sqrt(Scalar(1) + numext::abs2(asml/abig));
222  #endif
223 }
224 
225 #define BENCH_PERF(NRM) { \
226  float af = 0; double ad = 0; std::complex<float> ac = 0; \
227  Eigen::BenchTimer tf, td, tcf; tf.reset(); td.reset(); tcf.reset();\
228  for (int k=0; k<tries; ++k) { \
229  tf.start(); \
230  for (int i=0; i<iters; ++i) { af += NRM(vf); } \
231  tf.stop(); \
232  } \
233  for (int k=0; k<tries; ++k) { \
234  td.start(); \
235  for (int i=0; i<iters; ++i) { ad += NRM(vd); } \
236  td.stop(); \
237  } \
238  /*for (int k=0; k<std::max(1,tries/3); ++k) { \
239  tcf.start(); \
240  for (int i=0; i<iters; ++i) { ac += NRM(vcf); } \
241  tcf.stop(); \
242  } */\
243  std::cout << #NRM << "\t" << tf.value() << " " << td.value() << " " << tcf.value() << "\n"; \
244 }
245 
246 void check_accuracy(double basef, double based, int s)
247 {
248  double yf = basef * std::abs(internal::random<double>());
249  double yd = based * std::abs(internal::random<double>());
250  VectorXf vf = VectorXf::Ones(s) * yf;
251  VectorXd vd = VectorXd::Ones(s) * yd;
252 
253  std::cout << "reference\t" << std::sqrt(double(s))*yf << "\t" << std::sqrt(double(s))*yd << "\n";
254  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\n";
255  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\n";
256  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\n";
257  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\n";
258  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\n";
259  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\n";
260  std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\n";
261 }
262 
263 void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
264 {
265  VectorXf vf(s);
266  VectorXd vd(s);
267  for (int i=0; i<s; ++i)
268  {
269  vf[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ef0,ef1));
270  vd[i] = std::abs(internal::random<double>()) * std::pow(double(10), internal::random<int>(ed0,ed1));
271  }
272 
273  //std::cout << "reference\t" << internal::sqrt(double(s))*yf << "\t" << internal::sqrt(double(s))*yd << "\n";
274  std::cout << "sqsumNorm\t" << sqsumNorm(vf) << "\t" << sqsumNorm(vd) << "\t" << sqsumNorm(vf.cast<long double>()) << "\t" << sqsumNorm(vd.cast<long double>()) << "\n";
275  std::cout << "hypotNorm\t" << hypotNorm(vf) << "\t" << hypotNorm(vd) << "\t" << hypotNorm(vf.cast<long double>()) << "\t" << hypotNorm(vd.cast<long double>()) << "\n";
276  std::cout << "blueNorm\t" << blueNorm(vf) << "\t" << blueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
277  std::cout << "pblueNorm\t" << pblueNorm(vf) << "\t" << pblueNorm(vd) << "\t" << blueNorm(vf.cast<long double>()) << "\t" << blueNorm(vd.cast<long double>()) << "\n";
278  std::cout << "lapackNorm\t" << lapackNorm(vf) << "\t" << lapackNorm(vd) << "\t" << lapackNorm(vf.cast<long double>()) << "\t" << lapackNorm(vd.cast<long double>()) << "\n";
279  std::cout << "twopassNorm\t" << twopassNorm(vf) << "\t" << twopassNorm(vd) << "\t" << twopassNorm(vf.cast<long double>()) << "\t" << twopassNorm(vd.cast<long double>()) << "\n";
280 // std::cout << "bl2passNorm\t" << bl2passNorm(vf) << "\t" << bl2passNorm(vd) << "\t" << bl2passNorm(vf.cast<long double>()) << "\t" << bl2passNorm(vd.cast<long double>()) << "\n";
281 }
282 
283 int main(int argc, char** argv)
284 {
285  int tries = 10;
286  int iters = 100000;
287  double y = 1.1345743233455785456788e12 * internal::random<double>();
288  VectorXf v = VectorXf::Ones(1024) * y;
289 
290 // return 0;
291  int s = 10000;
292  double basef_ok = 1.1345743233455785456788e15;
293  double based_ok = 1.1345743233455785456788e95;
294 
295  double basef_under = 1.1345743233455785456788e-27;
296  double based_under = 1.1345743233455785456788e-303;
297 
298  double basef_over = 1.1345743233455785456788e+27;
299  double based_over = 1.1345743233455785456788e+302;
300 
301  std::cout.precision(20);
302 
303  std::cerr << "\nNo under/overflow:\n";
304  check_accuracy(basef_ok, based_ok, s);
305 
306  std::cerr << "\nUnderflow:\n";
307  check_accuracy(basef_under, based_under, s);
308 
309  std::cerr << "\nOverflow:\n";
310  check_accuracy(basef_over, based_over, s);
311 
312  std::cerr << "\nVarying (over):\n";
313  for (int k=0; k<1; ++k)
314  {
315  check_accuracy_var(20,27,190,302,s);
316  std::cout << "\n";
317  }
318 
319  std::cerr << "\nVarying (under):\n";
320  for (int k=0; k<1; ++k)
321  {
322  check_accuracy_var(-27,20,-302,-190,s);
323  std::cout << "\n";
324  }
325 
326  y = 1;
327  std::cout.precision(4);
328  int s1 = 1024*1024*32;
329  std::cerr << "Performance (out of cache, " << s1 << "):\n";
330  {
331  int iters = 1;
332  VectorXf vf = VectorXf::Random(s1) * y;
333  VectorXd vd = VectorXd::Random(s1) * y;
334  VectorXcf vcf = VectorXcf::Random(s1) * y;
343  }
344 
345  std::cerr << "\nPerformance (in cache, " << 512 << "):\n";
346  {
347  int iters = 100000;
348  VectorXf vf = VectorXf::Random(512) * y;
349  VectorXd vd = VectorXd::Random(512) * y;
350  VectorXcf vcf = VectorXcf::Random(512) * y;
359  }
360 }
Eigen
Namespace containing all symbols from the Eigen library.
Definition: jet.h:637
check_accuracy_var
void check_accuracy_var(int ef0, int ef1, int ed0, int ed1, int s)
Definition: bench_norm.cpp:259
simple_graph::b1
Vector2 b1(2, -1)
BENCH_PERF
#define BENCH_PERF(NRM)
Definition: bench_norm.cpp:225
Eigen::internal::packet_traits< Scalar >::size
@ size
Definition: GenericPacketMath.h:112
Eigen::internal::Packet4f
__vector float Packet4f
Definition: AltiVec/PacketMath.h:30
s
RealScalar s
Definition: level1_cplx_impl.h:126
pblueNorm
EIGEN_DONT_INLINE T::Scalar pblueNorm(const T &v)
Definition: bench_norm.cpp:98
Packet
internal::packet_traits< Scalar >::type Packet
Definition: benchmark-blocking-sizes.cpp:54
b
Scalar * b
Definition: benchVecAdd.cpp:17
eigen_assert
#define eigen_assert(x)
Definition: Macros.h:1037
simple_graph::b2
Vector2 b2(4, -5)
twopassNorm
EIGEN_DONT_INLINE T::Scalar twopassNorm(T &v)
Definition: bench_norm.cpp:56
hypotNorm
EIGEN_DONT_INLINE T::Scalar hypotNorm(T &v)
Definition: bench_norm.cpp:21
n
int n
Definition: BiCGSTAB_simple.cpp:1
relerr
Derived::RealScalar relerr(const MatrixBase< Derived > &A, const MatrixBase< OtherDerived > &B)
Definition: matrix_functions.h:64
scale
set noclip points set clip one set noclip two set bar set border lt lw set xdata set ydata set zdata set x2data set y2data set boxwidth set dummy y set format x g set format y g set format x2 g set format y2 g set format z g set angles radians set nogrid set key title set key left top Right noreverse box linetype linewidth samplen spacing width set nolabel set noarrow set nologscale set logscale x set set pointsize set encoding default set nopolar set noparametric set set set set surface set nocontour set clabel set mapping cartesian set nohidden3d set cntrparam order set cntrparam linear set cntrparam levels auto set cntrparam points set size set set xzeroaxis lt lw set x2zeroaxis lt lw set yzeroaxis lt lw set y2zeroaxis lt lw set tics in set ticslevel set tics scale
Definition: gnuplot_common_settings.hh:54
j
std::ptrdiff_t j
Definition: tut_arithmetic_redux_minmax.cpp:2
divacNorm
EIGEN_DONT_INLINE T::Scalar divacNorm(T &v)
Definition: bench_norm.cpp:70
lapackNorm
EIGEN_DONT_INLINE T::Scalar lapackNorm(T &v)
Definition: bench_norm.cpp:33
stableNorm
EIGEN_DONT_INLINE T::Scalar stableNorm(T &v)
Definition: bench_norm.cpp:15
Eigen::internal::pand
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:1050
bl2passNorm
EIGEN_DONT_INLINE T::Scalar bl2passNorm(T &v)
Definition: bench_norm.cpp:64
Eigen::Triplet< double >
ceres::pow
Jet< T, N > pow(const Jet< T, N > &f, double g)
Definition: jet.h:570
sqsumNorm
EIGEN_DONT_INLINE T::Scalar sqsumNorm(T &v)
Definition: bench_norm.cpp:9
y
Scalar * y
Definition: level1_cplx_impl.h:124
main
int main(int argc, char **argv)
Definition: bench_norm.cpp:279
a
ArrayXXi a
Definition: Array_initializer_list_23_cxx11.cpp:1
Eigen::numext::abs2
EIGEN_DEVICE_FUNC bool abs2(bool x)
Definition: Eigen/src/Core/MathFunctions.h:1294
Eigen::internal::predux
EIGEN_DEVICE_FUNC unpacket_traits< Packet >::type predux(const Packet &a)
Definition: GenericPacketMath.h:875
Eigen::internal::pmul
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
Eigen::internal::Packet2d
v2f64 Packet2d
Definition: MSA/PacketMath.h:820
std
Definition: BFloat16.h:88
BenchTimer.h
Eigen::internal::pabs
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: AltiVec/PacketMath.h:1176
v
Array< int, Dynamic, 1 > v
Definition: Array_initializer_list_vector_cxx11.cpp:1
ps
int RealScalar int RealScalar int RealScalar RealScalar * ps
Definition: level1_cplx_impl.h:120
Eigen::internal::padd
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:215
min
#define min(a, b)
Definition: datatypes.h:19
blueNorm
EIGEN_DONT_INLINE T::Scalar blueNorm(T &v)
Definition: bench_norm.cpp:27
abs
#define abs(x)
Definition: datatypes.h:17
internal
Definition: BandTriangularSolver.h:13
check_accuracy
void check_accuracy(double basef, double based, int s)
Definition: bench_norm.cpp:242
Eigen::internal::pandnot
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: AVX/PacketMath.h:1053
max
#define max(a, b)
Definition: datatypes.h:20
Eigen::NumTraits
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:232
ceres::sqrt
Jet< T, N > sqrt(const Jet< T, N > &f)
Definition: jet.h:418
i
int i
Definition: BiCGSTAB_step_by_step.cpp:9
Scalar
SCALAR Scalar
Definition: bench_gemm.cpp:46
EIGEN_DONT_INLINE
#define EIGEN_DONT_INLINE
Definition: Macros.h:940


gtsam
Author(s):
autogenerated on Thu Jun 13 2024 03:01:47