GteSingularValueDecomposition.h
Go to the documentation of this file.
1 // David Eberly, Geometric Tools, Redmond WA 98052
2 // Copyright (c) 1998-2017
3 // Distributed under the Boost Software License, Version 1.0.
4 // http://www.boost.org/LICENSE_1_0.txt
5 // http://www.geometrictools.com/License/Boost/LICENSE_1_0.txt
6 // File Version: 3.0.0 (2016/06/19)
7 
8 #pragma once
9 
11 #include <LowLevel/GteWrapper.h>
12 #include <algorithm>
13 #include <cmath>
14 #include <cstring>
15 #include <vector>
16 
17 // The SingularValueDecomposition class is an implementation of Algorithm
18 // 8.3.2 (The SVD Algorithm) described in "Matrix Computations, 2nd
19 // edition" by G. H. Golub and Charles F. Van Loan, The Johns Hopkins
20 // Press, Baltimore MD, Fourth Printing 1993. Algorithm 5.4.2 (Householder
21 // Bidiagonalization) is used to reduce A to bidiagonal B. Algorithm 8.3.1
22 // (Golub-Kahan SVD Step) is used for the iterative reduction from bidiagonal
23 // to diagonal. If A is the original matrix, S is the matrix whose diagonal
24 // entries are the singular values, and U and V are corresponding matrices,
25 // then theoretically U^T*A*V = S. Numerically, we have errors
26 // E = U^T*A*V - S. Algorithm 8.3.2 mentions that one expects |E| is
27 // approximately u*|A|, where |M| denotes the Frobenius norm of M and where
28 // u is the unit roundoff for the floating-point arithmetic: 2^{-23} for
29 // 'float', which is FLT_EPSILON = 1.192092896e-7f, and 2^{-52} for'double',
30 // which is DBL_EPSILON = 2.2204460492503131e-16.
31 //
32 // The condition |a(i,i+1)| <= epsilon*(|a(i,i) + a(i+1,i+1)|) used to
33 // determine when the reduction decouples to smaller problems is implemented
34 // as: sum = |a(i,i)| + |a(i+1,i+1)|; sum + |a(i,i+1)| == sum. The idea is
35 // that the superdiagonal term is small relative to its diagonal neighbors,
36 // and so it is effectively zero. The unit tests have shown that this
37 // interpretation of decoupling is effective.
38 //
39 // The condition |a(i,i)| <= epsilon*|B| used to determine when the
40 // reduction decouples (with a zero singular value) is implemented using
41 // the Frobenius norm of B and epsilon = multiplier*u, where for now the
42 // multiplier is hard-coded in Solve(...) as 8.
43 //
44 // The authors suggest that once you have the bidiagonal matrix, a practical
45 // implementation will store the diagonal and superdiagonal entries in linear
46 // arrays, ignoring the theoretically zero values not in the 2-band. This is
47 // good for cache coherence, and we have used the suggestion. The essential
48 // parts of the Householder u-vectors are stored in the lower-triangular
49 // portion of the matrix and the essential parts of the Householder v-vectors
50 // are stored in the upper-triangular portion of the matrix. To avoid having
51 // to recompute 2/Dot(u,u) and 2/Dot(v,v) when constructing orthogonal U and
52 // V, we store these quantities in additional memory during bidiagonalization.
53 //
54 // For matrices with randomly generated values in [0,1], the unit tests
55 // produce the following information for N-by-N matrices.
56 //
57 // N |A| |E| |E|/|A| iterations
58 // -------------------------------------------
59 // 2 1.4831 4.1540e-16 2.8007e-16 1
60 // 3 2.1065 3.5024e-16 1.6626e-16 4
61 // 4 2.4979 7.4605e-16 2.9867e-16 6
62 // 5 3.6591 1.8305e-15 5.0025e-16 9
63 // 6 4.0572 2.0571e-15 5.0702e-16 10
64 // 7 4.7745 2.9057e-15 6.0859e-16 12
65 // 8 5.1964 2.7958e-15 5.3803e-16 13
66 // 9 5.7599 3.3128e-15 5.7514e-16 16
67 // 10 6.2700 3.7209e-15 5.9344e-16 16
68 // 11 6.8220 5.0580e-15 7.4142e-16 18
69 // 12 7.4540 5.2493e-15 7.0422e-16 21
70 // 13 8.1225 5.6043e-15 6.8997e-16 24
71 // 14 8.5883 5.8553e-15 6.8177e-16 26
72 // 15 9.1337 6.9663e-15 7.6270e-16 27
73 // 16 9.7884 9.1358e-15 9.3333e-16 29
74 // 17 10.2407 8.2715e-15 8.0771e-16 34
75 // 18 10.7147 8.9748e-15 8.3761e-16 33
76 // 19 11.1887 1.0094e-14 9.0220e-16 32
77 // 20 11.7739 9.7000e-15 8.2386e-16 35
78 // 21 12.2822 1.1217e-14 9.1329e-16 36
79 // 22 12.7649 1.1071e-14 8.6732e-16 37
80 // 23 13.3366 1.1271e-14 8.4513e-16 41
81 // 24 13.8505 1.2806e-14 9.2460e-16 43
82 // 25 14.4332 1.3081e-14 9.0637e-16 43
83 // 26 14.9301 1.4882e-14 9.9680e-16 46
84 // 27 15.5214 1.5571e-14 1.0032e-15 48
85 // 28 16.1029 1.7553e-14 1.0900e-15 49
86 // 29 16.6407 1.6219e-14 9.7467e-16 53
87 // 30 17.1891 1.8560e-14 1.0797e-15 55
88 // 31 17.7773 1.8522e-14 1.0419e-15 56
89 //
90 // The singularvalues and |E|/|A| values were compared to those generated by
91 // Mathematica Version 9.0; Wolfram Research, Inc., Champaign IL, 2012.
92 // The results were all comparable with singular values agreeing to a large
93 // number of decimal places.
94 //
95 // Timing on an Intel (R) Core (TM) i7-3930K CPU @ 3.20 GHZ (in seconds)
96 // for NxN matrices:
97 //
98 // N |E|/|A| iters bidiag QR U-and-V comperr
99 // -------------------------------------------------------
100 // 512 3.8632e-15 848 0.341 0.016 1.844 2.203
101 // 1024 5.6456e-15 1654 4.279 0.032 18.765 20.844
102 // 2048 7.5499e-15 3250 40.421 0.141 186.607 213.216
103 //
104 // where iters is the number of QR steps taken, bidiag is the computation
105 // of the Householder reflection vectors, U-and-V is the composition of
106 // Householder reflections and Givens rotations to obtain the orthogonal
107 // matrices of the decomposigion, and comperr is the computation E =
108 // U^T*A*V - S.
109 
110 namespace gte
111 {
112 
113 template <typename Real>
115 {
116 public:
117  // The solver processes MxN symmetric matrices, where M >= N > 1
118  // ('numRows' is M and 'numCols' is N) and the matrix is stored in
119  // row-major order. The maximum number of iterations ('maxIterations')
120  // must be specified for the reduction of a bidiagonal matrix to a
121  // diagonal matrix. The goal is to compute MxM orthogonal U, NxN
122  // orthogonal V, and MxN matrix S for which U^T*A*V = S. The only
123  // nonzero entries of S are on the diagonal; the diagonal entries are
124  // the singular values of the original matrix.
125  SingularValueDecomposition(int numRows, int numCols,
126  unsigned int maxIterations);
127 
128  // A copy of the MxN input is made internally. The order of the singular
129  // values is specified by sortType: -1 (decreasing), 0 (no sorting), or +1
130  // (increasing). When sorted, the columns of the orthogonal matrices
131  // are ordered accordingly. The return value is the number of iterations
132  // consumed when convergence occurred, 0xFFFFFFFF when convergence did not
133  // occur or 0 when N <= 1 or M < N was passed to the constructor.
134  unsigned int Solve(Real const* input, int sortType);
135 
136  // Get the singular values of the matrix passed to Solve(...). The input
137  // 'singularValues' must have N elements.
138  void GetSingularValues(Real* singularValues) const;
139 
140  // Accumulate the Householder reflections, the Givens rotations, and the
141  // diagonal fix-up matrix to compute the orthogonal matrices U and V for
142  // which U^T*A*V = S. The input uMatrix must be MxM and the input vMatrix
143  // must be NxN, both stored in row-major order.
144  void GetU(Real* uMatrix) const;
145  void GetV(Real* vMatrix) const;
146 
147  // Compute a single column of U or V. The reflections and rotations are
148  // applied incrementally. This is useful when you want only a small
149  // number of the singular values or vectors.
150  void GetUColumn(int index, Real* uColumn) const;
151  void GetVColumn(int index, Real* vColumn) const;
152  Real GetSingularValue(int index) const;
153 
154 private:
155  // Bidiagonalize using Householder reflections. On input, mMatrix is a
156  // copy of the input matrix and has one extra row. On output, the
157  // diagonal and superdiagonal contain the bidiagonalized results. The
158  // lower-triangular portion stores the essential parts of the Householder
159  // u vectors (the elements of u after the leading 1-valued component) and
160  // the upper-triangular portion stores the essential parts of the
161  // Householder v vectors. To avoid recomputing 2/Dot(u,u) and 2/Dot(v,v),
162  // these quantities are stored in mTwoInvUTU and mTwoInvVTV.
163  void Bidiagonalize();
164 
165  // A helper for generating Givens rotation sine and cosine robustly.
166  void GetSinCos(Real u, Real v, Real& cs, Real& sn);
167 
168  // Test for (effectively) zero-valued diagonal entries (through all but
169  // the last). For each such entry, the B matrix decouples. Perform
170  // that decoupling. If there are no zero-valued entries, then the
171  // Golub-Kahan step must be performed.
172  bool DiagonalEntriesNonzero(int imin, int imax, Real threshold);
173 
174  // This is Algorithm 8.3.1 in "Matrix Computations, 2nd edition" by
175  // G. H. Golub and C. F. Van Loan.
176  void DoGolubKahanStep(int imin, int imax);
177 
178  // The diagonal entries are not guaranteed to be nonnegative during the
179  // construction. After convergence to a diagonal matrix S, test for
180  // negative entries and build a diagonal matrix that reverses the sign
181  // on the S-entry.
183 
184  // Sort the singular values and compute the corresponding permutation of
185  // the indices of the array storing the singular values. The permutation
186  // is used for reordering the singular values and the corresponding
187  // columns of the orthogonal matrix in the calls to GetSingularValues(...)
188  // and GetOrthogonalMatrices(...).
189  void ComputePermutation(int sortType);
190 
191  // The number rows and columns of the matrices to be processed.
193 
194  // The maximum number of iterations for reducing the bidiagonal matrix
195  // to a diagonal matrix.
196  unsigned int mMaxIterations;
197 
198  // The internal copy of a matrix passed to the solver. See the comments
199  // about function Bidiagonalize() about what is stored in the matrix.
200  std::vector<Real> mMatrix; // MxN elements
201 
202  // After the initial bidiagonalization by Householder reflections, we no
203  // longer need the full mMatrix. Copy the diagonal and superdiagonal
204  // entries to linear arrays in order to be cache friendly.
205  std::vector<Real> mDiagonal; // N elements
206  std::vector<Real> mSuperdiagonal; // N-1 elements
207 
208  // The Givens rotations used to reduce the initial bidiagonal matrix to
209  // a diagonal matrix. A rotation is the identity with the following
210  // replacement entries: R(index0,index0) = cs, R(index0,index1) = sn,
211  // R(index1,index0) = -sn, and R(index1,index1) = cs. If N is the
212  // number of matrix columns and K is the maximum number of iterations, the
213  // maximum number of right or left Givens rotations is K*(N-1). The
214  // maximum amount of memory is allocated to store these. However, we also
215  // potentially need left rotations to decouple the matrix when a diagonal
216  // terms are zero. Worst case is a number of matrices quadratic in N, so
217  // for now we just use std::vector<Rotation> whose initial capacity is
218  // K*(N-1).
220  {
221  GivensRotation();
222  GivensRotation(int inIndex0, int inIndex1, Real inCs, Real inSn);
224  Real cs, sn;
225  };
226 
227  std::vector<GivensRotation> mRGivens;
228  std::vector<GivensRotation> mLGivens;
229 
230  // The diagonal matrix that is used to convert S-entries to nonnegative.
231  std::vector<Real> mFixupDiagonal; // N elements
232 
233  // When sorting is requested, the permutation associated with the sort is
234  // stored in mPermutation. When sorting is not requested, mPermutation[0]
235  // is set to -1. mVisited is used for finding cycles in the permutation.
236  std::vector<int> mPermutation; // N elements
237  mutable std::vector<int> mVisited; // N elements
238 
239  // Temporary storage to compute Householder reflections and to support
240  // sorting of columns of the orthogonal matrices.
241  std::vector<Real> mTwoInvUTU; // N elements
242  std::vector<Real> mTwoInvVTV; // N-2 elements
243  mutable std::vector<Real> mUVector; // M elements
244  mutable std::vector<Real> mVVector; // N elements
245  mutable std::vector<Real> mWVector; // max(M,N) elements
246 };
247 
248 
249 template <typename Real>
251  int numCols, unsigned int maxIterations)
252  :
253  mNumRows(0),
254  mNumCols(0),
255  mMaxIterations(0)
256 {
257  if (numCols > 1 && numRows >= numCols && maxIterations > 0)
258  {
259  mNumRows = numRows;
260  mNumCols = numCols;
261  mMaxIterations = maxIterations;
262  mMatrix.resize(numRows * numCols);
263  mDiagonal.resize(numCols);
264  mSuperdiagonal.resize(numCols - 1);
265  mRGivens.reserve(maxIterations*(numCols - 1));
266  mLGivens.reserve(maxIterations*(numCols - 1));
267  mFixupDiagonal.resize(numCols);
268  mPermutation.resize(numCols);
269  mVisited.resize(numCols);
270  mTwoInvUTU.resize(numCols);
271  mTwoInvVTV.resize(numCols - 2);
272  mUVector.resize(numRows);
273  mVVector.resize(numCols);
274  mWVector.resize(numRows);
275  }
276 }
277 
278 template <typename Real>
280  int sortType)
281 {
282  if (mNumRows > 0)
283  {
284  int numElements = mNumRows * mNumCols;
285  std::copy(input, input + numElements, mMatrix.begin());
286  Bidiagonalize();
287 
288  // Compute 'threshold = multiplier*epsilon*|B|' as the threshold for
289  // diagonal entries effectively zero; that is, |d| <= |threshold|
290  // implies that d is (effectively) zero. TODO: Allow the caller to
291  // pass 'multiplier' to the constructor.
292  //
293  // We will use the L2-norm |B|, which is the length of the elements
294  // of B treated as an NM-tuple. The following code avoids overflow
295  // when accumulating the squares of the elements when those elements
296  // are large.
297  Real maxAbsComp = std::abs(input[0]);
298  for (int i = 1; i < numElements; ++i)
299  {
300  Real absComp = std::abs(input[i]);
301  if (absComp > maxAbsComp)
302  {
303  maxAbsComp = absComp;
304  }
305  }
306 
307  Real norm = (Real)0;
308  if (maxAbsComp > (Real)0)
309  {
310  Real invMaxAbsComp = ((Real)1) / maxAbsComp;
311  for (int i = 0; i < numElements; ++i)
312  {
313  Real ratio = input[i] * invMaxAbsComp;
314  norm += ratio * ratio;
315  }
316  norm = maxAbsComp*sqrt(norm);
317  }
318 
319  Real const multiplier = (Real)8; // TODO: Expose to caller.
320  Real const epsilon = std::numeric_limits<Real>::epsilon();
321  Real const threshold = multiplier * epsilon * norm;
322 
323  mRGivens.clear();
324  mLGivens.clear();
325  for (unsigned int j = 0; j < mMaxIterations; ++j)
326  {
327  int imin = -1, imax = -1;
328  for (int i = mNumCols - 2; i >= 0; --i)
329  {
330  // When a01 is much smaller than its diagonal neighbors, it is
331  // effectively zero.
332  Real a00 = mDiagonal[i];
333  Real a01 = mSuperdiagonal[i];
334  Real a11 = mDiagonal[i + 1];
335  Real sum = std::abs(a00) + std::abs(a11);
336  if (sum + std::abs(a01) != sum)
337  {
338  if (imax == -1)
339  {
340  imax = i;
341  }
342  imin = i;
343  }
344  else
345  {
346  // The superdiagonal term is effectively zero compared to
347  // the neighboring diagonal terms.
348  if (imin >= 0)
349  {
350  break;
351  }
352  }
353  }
354 
355  if (imax == -1)
356  {
357  // The algorithm has converged.
359  ComputePermutation(sortType);
360  return j;
361  }
362 
363  // We need to test diagonal entries of B for zero. For each zero
364  // diagonal entry, zero the superdiagonal.
365  if (DiagonalEntriesNonzero(imin, imax, threshold))
366  {
367  // Process the lower-right-most unreduced bidiagonal block.
368  DoGolubKahanStep(imin, imax);
369  }
370  }
371  return 0xFFFFFFFF;
372  }
373  else
374  {
375  return 0;
376  }
377 }
378 
379 template <typename Real>
381  Real* singularValues) const
382 {
383  if (singularValues && mNumCols > 0)
384  {
385  if (mPermutation[0] >= 0)
386  {
387  // Sorting was requested.
388  for (int i = 0; i < mNumCols; ++i)
389  {
390  int p = mPermutation[i];
391  singularValues[i] = mDiagonal[p];
392  }
393  }
394  else
395  {
396  // Sorting was not requested.
397  size_t numBytes = mNumCols*sizeof(Real);
398  Memcpy(singularValues, &mDiagonal[0], numBytes);
399  }
400  }
401 }
402 
403 template <typename Real>
404 void SingularValueDecomposition<Real>::GetU(Real* uMatrix) const
405 {
406  if (!uMatrix || mNumCols == 0)
407  {
408  // Invalid input or the constructor failed.
409  return;
410  }
411 
412  // Start with the identity matrix.
413  std::fill(uMatrix, uMatrix + mNumRows*mNumRows, (Real)0);
414  for (int d = 0; d < mNumRows; ++d)
415  {
416  uMatrix[d + mNumRows*d] = (Real)1;
417  }
418 
419  // Multiply the Householder reflections using backward accumulation.
420  int r, c;
421  for (int i0 = mNumCols - 1, i1 = i0 + 1; i0 >= 0; --i0, --i1)
422  {
423  // Copy the u vector and 2/Dot(u,u) from the matrix.
424  Real twoinvudu = mTwoInvUTU[i0];
425  Real const* column = &mMatrix[i0];
426  mUVector[i0] = (Real)1;
427  for (r = i1; r < mNumRows; ++r)
428  {
429  mUVector[r] = column[mNumCols*r];
430  }
431 
432  // Compute the w vector.
433  mWVector[i0] = twoinvudu;
434  for (r = i1; r < mNumRows; ++r)
435  {
436  mWVector[r] = (Real)0;
437  for (c = i1; c < mNumRows; ++c)
438  {
439  mWVector[r] += mUVector[c] * uMatrix[r + mNumRows*c];
440  }
441  mWVector[r] *= twoinvudu;
442  }
443 
444  // Update the matrix, U <- U - u*w^T.
445  for (r = i0; r < mNumRows; ++r)
446  {
447  for (c = i0; c < mNumRows; ++c)
448  {
449  uMatrix[c + mNumRows*r] -= mUVector[r] * mWVector[c];
450  }
451  }
452  }
453 
454  // Multiply the Givens rotations.
455  for (auto const& givens : mLGivens)
456  {
457  int j0 = givens.index0;
458  int j1 = givens.index1;
459  for (r = 0; r < mNumRows; ++r, j0 += mNumRows, j1 += mNumRows)
460  {
461  Real& q0 = uMatrix[j0];
462  Real& q1 = uMatrix[j1];
463  Real prd0 = givens.cs * q0 - givens.sn * q1;
464  Real prd1 = givens.sn * q0 + givens.cs * q1;
465  q0 = prd0;
466  q1 = prd1;
467  }
468  }
469 
470  if (mPermutation[0] >= 0)
471  {
472  // Sorting was requested.
473  std::fill(mVisited.begin(), mVisited.end(), 0);
474  for (c = 0; c < mNumCols; ++c)
475  {
476  if (mVisited[c] == 0 && mPermutation[c] != c)
477  {
478  // The item starts a cycle with 2 or more elements.
479  int start = c, current = c, next;
480  for (r = 0; r < mNumRows; ++r)
481  {
482  mWVector[r] = uMatrix[c + mNumRows*r];
483  }
484  while ((next = mPermutation[current]) != start)
485  {
486  mVisited[current] = 1;
487  for (r = 0; r < mNumRows; ++r)
488  {
489  uMatrix[current + mNumRows*r] =
490  uMatrix[next + mNumRows*r];
491  }
492  current = next;
493  }
494  mVisited[current] = 1;
495  for (r = 0; r < mNumRows; ++r)
496  {
497  uMatrix[current + mNumRows*r] = mWVector[r];
498  }
499  }
500  }
501  }
502 }
503 
504 template <typename Real>
505 void SingularValueDecomposition<Real>::GetV(Real* vMatrix) const
506 {
507  if (!vMatrix || mNumCols == 0)
508  {
509  // Invalid input or the constructor failed.
510  return;
511  }
512 
513  // Start with the identity matrix.
514  std::fill(vMatrix, vMatrix + mNumCols*mNumCols, (Real)0);
515  for (int d = 0; d < mNumCols; ++d)
516  {
517  vMatrix[d + mNumCols*d] = (Real)1;
518  }
519 
520  // Multiply the Householder reflections using backward accumulation.
521  int i0 = mNumCols - 3;
522  int i1 = i0 + 1;
523  int i2 = i0 + 2;
524  int r, c;
525  for (; i0 >= 0; --i0, --i1, --i2)
526  {
527  // Copy the v vector and 2/Dot(v,v) from the matrix.
528  Real twoinvvdv = mTwoInvVTV[i0];
529  Real const* row = &mMatrix[mNumCols*i0];
530  mVVector[i1] = (Real)1;
531  for (r = i2; r < mNumCols; ++r)
532  {
533  mVVector[r] = row[r];
534  }
535 
536  // Compute the w vector.
537  mWVector[i1] = twoinvvdv;
538  for (r = i2; r < mNumCols; ++r)
539  {
540  mWVector[r] = (Real)0;
541  for (c = i2; c < mNumCols; ++c)
542  {
543  mWVector[r] += mVVector[c] * vMatrix[r + mNumCols*c];
544  }
545  mWVector[r] *= twoinvvdv;
546  }
547 
548  // Update the matrix, V <- V - v*w^T.
549  for (r = i1; r < mNumCols; ++r)
550  {
551  for (c = i1; c < mNumCols; ++c)
552  {
553  vMatrix[c + mNumCols*r] -= mVVector[r] * mWVector[c];
554  }
555  }
556  }
557 
558  // Multiply the Givens rotations.
559  for (auto const& givens : mRGivens)
560  {
561  int j0 = givens.index0;
562  int j1 = givens.index1;
563  for (c = 0; c < mNumCols; ++c, j0 += mNumCols, j1 += mNumCols)
564  {
565  Real& q0 = vMatrix[j0];
566  Real& q1 = vMatrix[j1];
567  Real prd0 = givens.cs * q0 - givens.sn * q1;
568  Real prd1 = givens.sn * q0 + givens.cs * q1;
569  q0 = prd0;
570  q1 = prd1;
571  }
572  }
573 
574  // Fix-up the diagonal.
575  for (r = 0; r < mNumCols; ++r)
576  {
577  for (c = 0; c < mNumCols; ++c)
578  {
579  vMatrix[c + mNumCols*r] *= mFixupDiagonal[c];
580  }
581  }
582 
583  if (mPermutation[0] >= 0)
584  {
585  // Sorting was requested.
586  std::fill(mVisited.begin(), mVisited.end(), 0);
587  for (c = 0; c < mNumCols; ++c)
588  {
589  if (mVisited[c] == 0 && mPermutation[c] != c)
590  {
591  // The item starts a cycle with 2 or more elements.
592  int start = c, current = c, next;
593  for (r = 0; r < mNumCols; ++r)
594  {
595  mWVector[r] = vMatrix[c + mNumCols*r];
596  }
597  while ((next = mPermutation[current]) != start)
598  {
599  mVisited[current] = 1;
600  for (r = 0; r < mNumCols; ++r)
601  {
602  vMatrix[current + mNumCols*r] =
603  vMatrix[next + mNumCols*r];
604  }
605  current = next;
606  }
607  mVisited[current] = 1;
608  for (r = 0; r < mNumCols; ++r)
609  {
610  vMatrix[current + mNumCols*r] = mWVector[r];
611  }
612  }
613  }
614  }
615 }
616 
617 template <typename Real>
619  Real* uColumn) const
620 {
621  if (0 <= index && index < mNumRows)
622  {
623  // y = H*x, then x and y are swapped for the next H
624  Real* x = uColumn;
625  Real* y = &mWVector[0];
626 
627  // Start with the Euclidean basis vector.
628  memset(x, 0, mNumRows * sizeof(Real));
629  if (index < mNumCols && mPermutation[0] >= 0)
630  {
631  // Sorting was requested.
632  x[mPermutation[index]] = (Real)1;
633  }
634  else
635  {
636  x[index] = (Real)1;
637  }
638 
639  // Apply the Givens rotations.
640  for (auto const& givens : gte::reverse(mLGivens))
641  {
642  Real& xr0 = x[givens.index0];
643  Real& xr1 = x[givens.index1];
644  Real tmp0 = givens.cs * xr0 + givens.sn * xr1;
645  Real tmp1 = -givens.sn * xr0 + givens.cs * xr1;
646  xr0 = tmp0;
647  xr1 = tmp1;
648  }
649 
650  // Apply the Householder reflections.
651  for (int c = mNumCols - 1; c >= 0; --c)
652  {
653  // Get the Householder vector u.
654  int r;
655  for (r = 0; r < c; ++r)
656  {
657  y[r] = x[r];
658  }
659 
660  // Compute s = Dot(x,u) * 2/u^T*u.
661  Real s = x[r]; // r = c, u[r] = 1
662  for (int j = r + 1; j < mNumRows; ++j)
663  {
664  s += x[j] * mMatrix[c + mNumCols * j];
665  }
666  s *= mTwoInvUTU[c];
667 
668  // r = c, y[r] = x[r]*u[r] - s = x[r] - s because u[r] = 1
669  y[r] = x[r] - s;
670 
671  // Compute the remaining components of y.
672  for (++r; r < mNumRows; ++r)
673  {
674  y[r] = x[r] - s * mMatrix[c + mNumCols * r];
675  }
676 
677  std::swap(x, y);
678  }
679 
680  // The final product is stored in x.
681  if (x != uColumn)
682  {
683  size_t numBytes = mNumRows*sizeof(Real);
684  Memcpy(uColumn, x, numBytes);
685  }
686  }
687 }
688 
689 template <typename Real>
691  Real* vColumn) const
692 {
693  if (0 <= index && index < mNumCols)
694  {
695  // y = H*x, then x and y are swapped for the next H
696  Real* x = vColumn;
697  Real* y = &mWVector[0];
698 
699  // Start with the Euclidean basis vector.
700  memset(x, 0, mNumCols * sizeof(Real));
701  if (mPermutation[0] >= 0)
702  {
703  // Sorting was requested.
704  int p = mPermutation[index];
705  x[p] = mFixupDiagonal[p];
706  }
707  else
708  {
709  x[index] = mFixupDiagonal[index];
710  }
711 
712  // Apply the Givens rotations.
713  for (auto const& givens : gte::reverse(mRGivens))
714  {
715  Real& xr0 = x[givens.index0];
716  Real& xr1 = x[givens.index1];
717  Real tmp0 = givens.cs * xr0 + givens.sn * xr1;
718  Real tmp1 = -givens.sn * xr0 + givens.cs * xr1;
719  xr0 = tmp0;
720  xr1 = tmp1;
721  }
722 
723  // Apply the Householder reflections.
724  for (int r = mNumCols - 3; r >= 0; --r)
725  {
726  // Get the Householder vector v.
727  int c;
728  for (c = 0; c < r + 1; ++c)
729  {
730  y[c] = x[c];
731  }
732 
733  // Compute s = Dot(x,v) * 2/v^T*v.
734  Real s = x[c]; // c = r+1, v[c] = 1
735  for (int j = c + 1; j < mNumCols; ++j)
736  {
737  s += x[j] * mMatrix[j + mNumCols * r];
738  }
739  s *= mTwoInvVTV[r];
740 
741  // c = r+1, y[c] = x[c]*v[c] - s = x[c] - s because v[c] = 1
742  y[c] = x[c] - s;
743 
744  // Compute the remaining components of y.
745  for (++c; c < mNumCols; ++c)
746  {
747  y[c] = x[c] - s * mMatrix[c + mNumCols * r];
748  }
749 
750  std::swap(x, y);
751  }
752 
753  // The final product is stored in x.
754  if (x != vColumn)
755  {
756  size_t numBytes = mNumCols*sizeof(Real);
757  Memcpy(vColumn, x, numBytes);
758  }
759  }
760 }
761 
762 template <typename Real>
764 {
765  if (0 <= index && index < mNumCols)
766  {
767  if (mPermutation[0] >= 0)
768  {
769  // Sorting was requested.
770  return mDiagonal[mPermutation[index]];
771  }
772  else
773  {
774  // Sorting was not requested.
775  return mDiagonal[index];
776  }
777  }
778  else
779  {
780  return (Real)0;
781  }
782 }
783 
784 template <typename Real>
786 {
787  int r, c;
788  for (int i = 0, ip1 = 1; i < mNumCols; ++i, ++ip1)
789  {
790  // Compute the U-Householder vector.
791  Real length = (Real)0;
792  for (r = i; r < mNumRows; ++r)
793  {
794  Real ur = mMatrix[i + mNumCols*r];
795  mUVector[r] = ur;
796  length += ur * ur;
797  }
798  Real udu = (Real)1;
799  length = sqrt(length);
800  if (length >(Real)0)
801  {
802  Real& u1 = mUVector[i];
803  Real sgn = (u1 >= (Real)0 ? (Real)1 : (Real)-1);
804  Real invDenom = ((Real)1) / (u1 + sgn * length);
805  u1 = (Real)1;
806  for (r = ip1; r < mNumRows; ++r)
807  {
808  Real& ur = mUVector[r];
809  ur *= invDenom;
810  udu += ur * ur;
811  }
812  }
813 
814  // Compute the rank-1 offset u*w^T.
815  Real invudu = (Real)1 / udu;
816  Real twoinvudu = invudu * (Real)2;
817  for (c = i; c < mNumCols; ++c)
818  {
819  mWVector[c] = (Real)0;
820  for (r = i; r < mNumRows; ++r)
821  {
822  mWVector[c] += mMatrix[c + mNumCols*r] * mUVector[r];
823  }
824  mWVector[c] *= twoinvudu;
825  }
826 
827  // Update the input matrix.
828  for (r = i; r < mNumRows; ++r)
829  {
830  for (c = i; c < mNumCols; ++c)
831  {
832  mMatrix[c + mNumCols*r] -= mUVector[r] * mWVector[c];
833  }
834  }
835 
836  if (i < mNumCols - 2)
837  {
838  // Compute the V-Householder vectors.
839  length = (Real)0;
840  for (c = ip1; c < mNumCols; ++c)
841  {
842  Real vc = mMatrix[c + mNumCols*i];
843  mVVector[c] = vc;
844  length += vc * vc;
845  }
846  Real vdv = (Real)1;
847  length = sqrt(length);
848  if (length >(Real)0)
849  {
850  Real& v1 = mVVector[ip1];
851  Real sgn = (v1 >= (Real)0 ? (Real)1 : (Real)-1);
852  Real invDenom = ((Real)1) / (v1 + sgn * length);
853  v1 = (Real)1;
854  for (c = ip1 + 1; c < mNumCols; ++c)
855  {
856  Real& vc = mVVector[c];
857  vc *= invDenom;
858  vdv += vc * vc;
859  }
860  }
861 
862  // Compute the rank-1 offset w*v^T.
863  Real invvdv = (Real)1 / vdv;
864  Real twoinvvdv = invvdv * (Real)2;
865  for (r = i; r < mNumRows; ++r)
866  {
867  mWVector[r] = (Real)0;
868  for (c = ip1; c < mNumCols; ++c)
869  {
870  mWVector[r] += mMatrix[c + mNumCols*r] * mVVector[c];
871  }
872  mWVector[r] *= twoinvvdv;
873  }
874 
875  // Update the input matrix.
876  for (r = i; r < mNumRows; ++r)
877  {
878  for (c = ip1; c < mNumCols; ++c)
879  {
880  mMatrix[c + mNumCols*r] -= mWVector[r] * mVVector[c];
881  }
882  }
883 
884  mTwoInvVTV[i] = twoinvvdv;
885  for (c = i + 2; c < mNumCols; ++c)
886  {
887  mMatrix[c + mNumCols*i] = mVVector[c];
888  }
889  }
890 
891  mTwoInvUTU[i] = twoinvudu;
892  for (r = ip1; r < mNumRows; ++r)
893  {
894  mMatrix[i + mNumCols*r] = mUVector[r];
895  }
896  }
897 
898  // Copy the diagonal and subdiagonal for cache coherence in the
899  // Golub-Kahan iterations.
900  int k, ksup = mNumCols - 1, index = 0, delta = mNumCols + 1;
901  for (k = 0; k < ksup; ++k, index += delta)
902  {
903  mDiagonal[k] = mMatrix[index];
904  mSuperdiagonal[k] = mMatrix[index + 1];
905  }
906  mDiagonal[k] = mMatrix[index];
907 }
908 
909 template <typename Real>
911  Real& sn)
912 {
913  // Solves sn*x + cs*y = 0 robustly.
914  Real tau;
915  if (y != (Real)0)
916  {
917  if (std::abs(y) > std::abs(x))
918  {
919  tau = -x / y;
920  sn = ((Real)1) / sqrt(((Real)1) + tau*tau);
921  cs = sn * tau;
922  }
923  else
924  {
925  tau = -y / x;
926  cs = ((Real)1) / sqrt(((Real)1) + tau*tau);
927  sn = cs * tau;
928  }
929  }
930  else
931  {
932  cs = (Real)1;
933  sn = (Real)0;
934  }
935 }
936 
937 template <typename Real>
939  int imax, Real threshold)
940 {
941  for (int i = imin; i <= imax; ++i)
942  {
943  if (std::abs(mDiagonal[i]) <= threshold)
944  {
945  // Use planar rotations to case the superdiagonal entry out of
946  // the matrix, thus producing a row of zeros.
947  Real x, z, cs, sn;
948  Real y = mSuperdiagonal[i];
949  mSuperdiagonal[i] = (Real)0;
950  for (int j = i + 1; j <= imax + 1; ++j)
951  {
952  x = mDiagonal[j];
953  GetSinCos(x, y, cs, sn);
954  mLGivens.push_back(GivensRotation(i, j, cs, sn));
955  mDiagonal[j] = cs*x - sn*y;
956  if (j <= imax)
957  {
958  z = mSuperdiagonal[j];
959  mSuperdiagonal[j] = cs*z;
960  y = sn*z;
961  }
962  }
963  return false;
964  }
965  }
966  return true;
967 }
968 
969 template <typename Real>
971 {
972  // The implicit shift. Compute the eigenvalue u of the lower-right 2x2
973  // block of A = B^T*B that is closer to b11.
974  Real f0 = (imax >= (Real)1 ? mSuperdiagonal[imax - 1] : (Real)0);
975  Real d1 = mDiagonal[imax];
976  Real f1 = mSuperdiagonal[imax];
977  Real d2 = mDiagonal[imax + 1];
978  Real a00 = d1*d1 + f0*f0;
979  Real a01 = d1*f1;
980  Real a11 = d2*d2 + f1*f1;
981  Real dif = (a00 - a11) * (Real)0.5;
982  Real sgn = (dif >= (Real)0 ? (Real)1 : (Real)-1);
983  Real a01sqr = a01 * a01;
984  Real u = a11 - a01sqr / (dif + sgn*sqrt(dif*dif + a01sqr));
985  Real x = mDiagonal[imin] * mDiagonal[imin] - u;
986  Real y = mDiagonal[imin] * mSuperdiagonal[imin];
987 
988  Real a12, a21, a22, a23, cs, sn;
989  Real a02 = (Real)0;
990  int i0 = imin - 1, i1 = imin, i2 = imin + 1;
991  for (; i1 <= imax; ++i0, ++i1, ++i2)
992  {
993  // Compute the Givens rotation G and save it for use in computing
994  // V in U^T*A*V = S.
995  GetSinCos(x, y, cs, sn);
996  mRGivens.push_back(GivensRotation(i1, i2, cs, sn));
997 
998  // Update B0 = B*G.
999  if (i1 > imin)
1000  {
1001  mSuperdiagonal[i0] = cs*mSuperdiagonal[i0] - sn*a02;
1002  }
1003 
1004  a11 = mDiagonal[i1];
1005  a12 = mSuperdiagonal[i1];
1006  a22 = mDiagonal[i2];
1007  mDiagonal[i1] = cs*a11 - sn*a12;
1008  mSuperdiagonal[i1] = sn*a11 + cs*a12;
1009  mDiagonal[i2] = cs*a22;
1010  a21 = -sn*a22;
1011 
1012  // Update the parameters for the next Givens rotations.
1013  x = mDiagonal[i1];
1014  y = a21;
1015 
1016  // Compute the Givens rotation G and save it for use in computing
1017  // U in U^T*A*V = S.
1018  GetSinCos(x, y, cs, sn);
1019  mLGivens.push_back(GivensRotation(i1, i2, cs, sn));
1020 
1021  // Update B1 = G^T*B0.
1022  a11 = mDiagonal[i1];
1023  a12 = mSuperdiagonal[i1];
1024  a22 = mDiagonal[i2];
1025  mDiagonal[i1] = cs*a11 - sn*a21;
1026  mSuperdiagonal[i1] = cs*a12 - sn*a22;
1027  mDiagonal[i2] = sn*a12 + cs*a22;
1028 
1029  if (i1 < imax)
1030  {
1031  a23 = mSuperdiagonal[i2];
1032  a02 = -sn*a23;
1033  mSuperdiagonal[i2] = cs*a23;
1034 
1035  // Update the parameters for the next Givens rotations.
1036  x = mSuperdiagonal[i1];
1037  y = a02;
1038  }
1039  }
1040 }
1041 
1042 template <typename Real>
1044 {
1045  for (int i = 0; i < mNumCols; ++i)
1046  {
1047  if (mDiagonal[i] >= (Real)0)
1048  {
1049  mFixupDiagonal[i] = (Real)1;
1050  }
1051  else
1052  {
1053  mDiagonal[i] = -mDiagonal[i];
1054  mFixupDiagonal[i] = (Real)-1;
1055  }
1056  }
1057 }
1058 
1059 template <typename Real>
1061 {
1062  if (sortType == 0)
1063  {
1064  // Set a flag for GetSingularValues() and GetOrthogonalMatrices() to
1065  // know that sorted output was not requested.
1066  mPermutation[0] = -1;
1067  return;
1068  }
1069 
1070  // Compute the permutation induced by sorting. Initially, we start with
1071  // the identity permutation I = (0,1,...,N-1).
1072  struct SortItem
1073  {
1074  Real singularValue;
1075  int index;
1076  };
1077 
1078  std::vector<SortItem> items(mNumCols);
1079  int i;
1080  for (i = 0; i < mNumCols; ++i)
1081  {
1082  items[i].singularValue = mDiagonal[i];
1083  items[i].index = i;
1084  }
1085 
1086  if (sortType > 0)
1087  {
1088  std::sort(items.begin(), items.end(),
1089  [](SortItem const& item0, SortItem const& item1)
1090  {
1091  return item0.singularValue < item1.singularValue;
1092  }
1093  );
1094  }
1095  else
1096  {
1097  std::sort(items.begin(), items.end(),
1098  [](SortItem const& item0, SortItem const& item1)
1099  {
1100  return item0.singularValue > item1.singularValue;
1101  }
1102  );
1103  }
1104 
1105  i = 0;
1106  for (auto const& item : items)
1107  {
1108  mPermutation[i++] = item.index;
1109  }
1110 
1111  // GetOrthogonalMatrices() has nontrivial code for computing the
1112  // orthogonal U and V from the reflections and rotations. To avoid
1113  // complicating the code further when sorting is requested, U and V are
1114  // computed as in the unsorted case. We then need to swap columns of
1115  // U and V to be consistent with the sorting of the singular values. To
1116  // minimize copying due to column swaps, we use permutation P. The
1117  // minimum number of transpositions to obtain P from I is N minus the
1118  // number of cycles of P. Each cycle is reordered with a minimum number
1119  // of transpositions; that is, the singular items are cyclically swapped,
1120  // leading to a minimum amount of copying. For example, if there is a
1121  // cycle i0 -> i1 -> i2 -> i3, then the copying is
1122  // save = singularitem[i0];
1123  // singularitem[i1] = singularitem[i2];
1124  // singularitem[i2] = singularitem[i3];
1125  // singularitem[i3] = save;
1126 }
1127 
1128 template <typename Real>
1130 {
1131  // No default initialization for fast creation of std::vector of objects
1132  // of this type.
1133 }
1134 
1135 template <typename Real>
1137  int inIndex1, Real inCs, Real inSn)
1138  :
1139  index0(inIndex0),
1140  index1(inIndex1),
1141  cs(inCs),
1142  sn(inSn)
1143 {
1144 }
1145 
1146 
1147 }
SingularValueDecomposition(int numRows, int numCols, unsigned int maxIterations)
ReversalObject< ReverseIterator > reverse(Iterable &&range)
GLfixed u1
Definition: glext.h:4927
gte::BSNumber< UIntegerType > abs(gte::BSNumber< UIntegerType > const &number)
Definition: GteBSNumber.h:966
GLuint start
Definition: glcorearb.h:470
GLfloat GLfloat v1
Definition: glcorearb.h:812
bool DiagonalEntriesNonzero(int imin, int imax, Real threshold)
void GetVColumn(int index, Real *vColumn) const
GLint GLenum GLint x
Definition: glcorearb.h:404
GLenum GLenum GLsizei void GLsizei void * column
Definition: glext.h:2725
const GLubyte * c
Definition: glext.h:11671
GLenum GLenum GLsizei void * row
Definition: glext.h:2725
GLuint GLsizei GLsizei * length
Definition: glcorearb.h:790
GLdouble GLdouble GLdouble z
Definition: glcorearb.h:843
const GLdouble * v
Definition: glcorearb.h:832
GLboolean r
Definition: glcorearb.h:1217
GLdouble s
Definition: glext.h:231
GLenum GLenum GLenum input
Definition: glext.h:9913
GLuint index
Definition: glcorearb.h:781
void Memcpy(void *target, void const *source, size_t count)
Definition: GteWrapper.cpp:16
GLfloat GLfloat p
Definition: glext.h:11668
unsigned int Solve(Real const *input, int sortType)
GLint y
Definition: glcorearb.h:98
void GetSinCos(Real u, Real v, Real &cs, Real &sn)
void GetSingularValues(Real *singularValues) const
void GetUColumn(int index, Real *uColumn) const


geometric_tools_engine
Author(s): Yijiang Huang
autogenerated on Thu Jul 18 2019 04:00:01