vcglib: Inverse.h Source File

00001 // This file is part of Eigen, a lightweight C++ template library
00002 // for linear algebra. Eigen itself is part of the KDE project.
00003 //
00004 // Copyright (C) 2008 Benoit Jacob <jacob.benoit.1@gmail.com>
00005 //
00006 // Eigen is free software; you can redistribute it and/or
00007 // modify it under the terms of the GNU Lesser General Public
00008 // License as published by the Free Software Foundation; either
00009 // version 3 of the License, or (at your option) any later version.
00010 //
00011 // Alternatively, you can redistribute it and/or
00012 // modify it under the terms of the GNU General Public License as
00013 // published by the Free Software Foundation; either version 2 of
00014 // the License, or (at your option) any later version.
00015 //
00016 // Eigen is distributed in the hope that it will be useful, but WITHOUT ANY
00017 // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00018 // FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License or the
00019 // GNU General Public License for more details.
00020 //
00021 // You should have received a copy of the GNU Lesser General Public
00022 // License and a copy of the GNU General Public License along with
00023 // Eigen. If not, see <http://www.gnu.org/licenses/>.
00024 
00025 #ifndef EIGEN_INVERSE_H
00026 #define EIGEN_INVERSE_H
00027 
00028 /********************************************************************
00029 *** Part 1 : optimized implementations for fixed-size 2,3,4 cases ***
00030 ********************************************************************/
00031 
00032 template<typename XprType, typename MatrixType>
00033 void ei_compute_inverse_in_size2_case(const XprType& matrix, MatrixType* result)
00034 {
00035   typedef typename MatrixType::Scalar Scalar;
00036   const Scalar invdet = Scalar(1) / matrix.determinant();
00037   result->coeffRef(0,0) = matrix.coeff(1,1) * invdet;
00038   result->coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
00039   result->coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
00040   result->coeffRef(1,1) = matrix.coeff(0,0) * invdet;
00041 }
00042 
00043 template<typename XprType, typename MatrixType>
00044 bool ei_compute_inverse_in_size2_case_with_check(const XprType& matrix, MatrixType* result)
00045 {
00046   typedef typename MatrixType::Scalar Scalar;
00047   const Scalar det = matrix.determinant();
00048   if(ei_isMuchSmallerThan(det, matrix.cwise().abs().maxCoeff())) return false;
00049   const Scalar invdet = Scalar(1) / det;
00050   result->coeffRef(0,0) = matrix.coeff(1,1) * invdet;
00051   result->coeffRef(1,0) = -matrix.coeff(1,0) * invdet;
00052   result->coeffRef(0,1) = -matrix.coeff(0,1) * invdet;
00053   result->coeffRef(1,1) = matrix.coeff(0,0) * invdet;
00054   return true;
00055 }
00056 
00057 template<typename Derived, typename OtherDerived>
00058 void ei_compute_inverse_in_size3_case(const Derived& matrix, OtherDerived* result)
00059 {
00060   typedef typename Derived::Scalar Scalar;
00061   const Scalar det_minor00 = matrix.minor(0,0).determinant();
00062   const Scalar det_minor10 = matrix.minor(1,0).determinant();
00063   const Scalar det_minor20 = matrix.minor(2,0).determinant();
00064   const Scalar invdet = Scalar(1) / ( det_minor00 * matrix.coeff(0,0)
00065                                     - det_minor10 * matrix.coeff(1,0)
00066                                     + det_minor20 * matrix.coeff(2,0) );
00067   result->coeffRef(0, 0) = det_minor00 * invdet;
00068   result->coeffRef(0, 1) = -det_minor10 * invdet;
00069   result->coeffRef(0, 2) = det_minor20 * invdet;
00070   result->coeffRef(1, 0) = -matrix.minor(0,1).determinant() * invdet;
00071   result->coeffRef(1, 1) = matrix.minor(1,1).determinant() * invdet;
00072   result->coeffRef(1, 2) = -matrix.minor(2,1).determinant() * invdet;
00073   result->coeffRef(2, 0) = matrix.minor(0,2).determinant() * invdet;
00074   result->coeffRef(2, 1) = -matrix.minor(1,2).determinant() * invdet;
00075   result->coeffRef(2, 2) = matrix.minor(2,2).determinant() * invdet;
00076 }
00077 
00078 template<typename Derived, typename OtherDerived, typename Scalar = typename Derived::Scalar>
00079 struct ei_compute_inverse_in_size4_case
00080 {
00081   static void run(const Derived& matrix, OtherDerived& result)
00082   {
00083     result.coeffRef(0,0) = matrix.minor(0,0).determinant();
00084     result.coeffRef(1,0) = -matrix.minor(0,1).determinant();
00085     result.coeffRef(2,0) = matrix.minor(0,2).determinant();
00086     result.coeffRef(3,0) = -matrix.minor(0,3).determinant();
00087     result.coeffRef(0,2) = matrix.minor(2,0).determinant();
00088     result.coeffRef(1,2) = -matrix.minor(2,1).determinant();
00089     result.coeffRef(2,2) = matrix.minor(2,2).determinant();
00090     result.coeffRef(3,2) = -matrix.minor(2,3).determinant();
00091     result.coeffRef(0,1) = -matrix.minor(1,0).determinant();
00092     result.coeffRef(1,1) = matrix.minor(1,1).determinant();
00093     result.coeffRef(2,1) = -matrix.minor(1,2).determinant();
00094     result.coeffRef(3,1) = matrix.minor(1,3).determinant();
00095     result.coeffRef(0,3) = -matrix.minor(3,0).determinant();
00096     result.coeffRef(1,3) = matrix.minor(3,1).determinant();
00097     result.coeffRef(2,3) = -matrix.minor(3,2).determinant();
00098     result.coeffRef(3,3) = matrix.minor(3,3).determinant();
00099     result /= (matrix.col(0).cwise()*result.row(0).transpose()).sum();
00100   }
00101 };
00102 
00103 #ifdef EIGEN_VECTORIZE_SSE
00104 // The SSE code for the 4x4 float matrix inverse in this file comes from the file
00105 //   ftp://download.intel.com/design/PentiumIII/sml/24504301.pdf
00106 // its copyright information is:
00107 //   Copyright (C) 1999 Intel Corporation
00108 // See page ii of that document for legal stuff. Not being lawyers, we just assume
00109 // here that if Intel makes this document publically available, with source code
00110 // and detailed explanations, it's because they want their CPUs to be fed with
00111 // good code, and therefore they presumably don't mind us using it in Eigen.
00112 template<typename Derived, typename OtherDerived>
00113 struct ei_compute_inverse_in_size4_case<Derived, OtherDerived, float>
00114 {
00115   static void run(const Derived& matrix, OtherDerived& result)
00116   {
00117     // Variables (Streaming SIMD Extensions registers) which will contain cofactors and, later, the
00118     // lines of the inverted matrix.
00119     __m128 minor0, minor1, minor2, minor3;
00120 
00121     // Variables which will contain the lines of the reference matrix and, later (after the transposition),
00122     // the columns of the original matrix.
00123     __m128 row0, row1, row2, row3;
00124 
00125     // Temporary variables and the variable that will contain the matrix determinant.
00126     __m128 det, tmp1;
00127 
00128     // Matrix transposition
00129     const float *src = matrix.data();
00130     tmp1  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)src)), (__m64*)(src+ 4));
00131     row1  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+8))), (__m64*)(src+12));
00132     row0  = _mm_shuffle_ps(tmp1, row1, 0x88);
00133     row1  = _mm_shuffle_ps(row1, tmp1, 0xDD);
00134     tmp1  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+ 2))), (__m64*)(src+ 6));
00135     row3  = _mm_loadh_pi(_mm_castpd_ps(_mm_load_sd((double*)(src+10))), (__m64*)(src+14));
00136     row2  = _mm_shuffle_ps(tmp1, row3, 0x88);
00137     row3  = _mm_shuffle_ps(row3, tmp1, 0xDD);
00138 
00139 
00140     // Cofactors calculation. Because in the process of cofactor computation some pairs in three-
00141     // element products are repeated, it is not reasonable to load these pairs anew every time. The
00142     // values in the registers with these pairs are formed using shuffle instruction. Cofactors are
00143     // calculated row by row (4 elements are placed in 1 SP FP SIMD floating point register).
00144 
00145     tmp1   = _mm_mul_ps(row2, row3);
00146     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00147     minor0  = _mm_mul_ps(row1, tmp1);
00148     minor1  = _mm_mul_ps(row0, tmp1);
00149     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00150     minor0  = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
00151     minor1  = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
00152     minor1  = _mm_shuffle_ps(minor1, minor1, 0x4E);
00153     //    -----------------------------------------------
00154     tmp1   = _mm_mul_ps(row1, row2);
00155     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00156     minor0  = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
00157     minor3  = _mm_mul_ps(row0, tmp1);
00158     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00159     minor0  = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
00160     minor3  = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
00161     minor3  = _mm_shuffle_ps(minor3, minor3, 0x4E);
00162     //    -----------------------------------------------
00163     tmp1   = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
00164     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00165     row2   = _mm_shuffle_ps(row2, row2, 0x4E);
00166     minor0  = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
00167     minor2  = _mm_mul_ps(row0, tmp1);
00168     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00169     minor0  = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
00170     minor2  = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
00171     minor2  = _mm_shuffle_ps(minor2, minor2, 0x4E);
00172     //    -----------------------------------------------
00173     tmp1   = _mm_mul_ps(row0, row1);
00174     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00175     minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
00176     minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
00177     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00178     minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
00179     minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
00180     //           -----------------------------------------------
00181     tmp1   = _mm_mul_ps(row0, row3);
00182     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00183     minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
00184     minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
00185     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00186     minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
00187     minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
00188     //           -----------------------------------------------
00189     tmp1   = _mm_mul_ps(row0, row2);
00190     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00191     minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
00192     minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
00193     tmp1   = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00194     minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
00195     minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
00196 
00197     // Evaluation of determinant and its reciprocal value. In the original Intel document,
00198     // 1/det was evaluated using a fast rcpps command with subsequent approximation using
00199     // the Newton-Raphson algorithm. Here, we go for a IEEE-compliant division instead,
00200     // so as to not compromise precision at all.
00201     det    = _mm_mul_ps(row0, minor0);
00202     det    = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
00203     det    = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
00204 //     tmp1= _mm_rcp_ss(det);
00205 //     det= _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
00206     det    = _mm_div_ss(_mm_set_ss(1.0f), det); // <--- yay, one original line not copied from Intel
00207     det    = _mm_shuffle_ps(det, det, 0x00);
00208     // warning, Intel's variable naming is very confusing: now 'det' is 1/det !
00209 
00210     // Multiplication of cofactors by 1/det. Storing the inverse matrix to the address in pointer src.
00211     minor0 = _mm_mul_ps(det, minor0);
00212     float *dst = result.data();
00213     _mm_storel_pi((__m64*)(dst), minor0);
00214     _mm_storeh_pi((__m64*)(dst+2), minor0);
00215     minor1 = _mm_mul_ps(det, minor1);
00216     _mm_storel_pi((__m64*)(dst+4), minor1);
00217     _mm_storeh_pi((__m64*)(dst+6), minor1);
00218     minor2 = _mm_mul_ps(det, minor2);
00219     _mm_storel_pi((__m64*)(dst+ 8), minor2);
00220     _mm_storeh_pi((__m64*)(dst+10), minor2);
00221     minor3 = _mm_mul_ps(det, minor3);
00222     _mm_storel_pi((__m64*)(dst+12), minor3);
00223     _mm_storeh_pi((__m64*)(dst+14), minor3);
00224   }
00225 };
00226 #endif
00227 
00228 /***********************************************
00229 *** Part 2 : selector and MatrixBase methods ***
00230 ***********************************************/
00231 
00232 template<typename Derived, typename OtherDerived, int Size = Derived::RowsAtCompileTime>
00233 struct ei_compute_inverse
00234 {
00235   static inline void run(const Derived& matrix, OtherDerived* result)
00236   {
00237     LU<Derived> lu(matrix);
00238     lu.computeInverse(result);
00239   }
00240 };
00241 
00242 template<typename Derived, typename OtherDerived>
00243 struct ei_compute_inverse<Derived, OtherDerived, 1>
00244 {
00245   static inline void run(const Derived& matrix, OtherDerived* result)
00246   {
00247     typedef typename Derived::Scalar Scalar;
00248     result->coeffRef(0,0) = Scalar(1) / matrix.coeff(0,0);
00249   }
00250 };
00251 
00252 template<typename Derived, typename OtherDerived>
00253 struct ei_compute_inverse<Derived, OtherDerived, 2>
00254 {
00255   static inline void run(const Derived& matrix, OtherDerived* result)
00256   {
00257     ei_compute_inverse_in_size2_case(matrix, result);
00258   }
00259 };
00260 
00261 template<typename Derived, typename OtherDerived>
00262 struct ei_compute_inverse<Derived, OtherDerived, 3>
00263 {
00264   static inline void run(const Derived& matrix, OtherDerived* result)
00265   {
00266     ei_compute_inverse_in_size3_case(matrix, result);
00267   }
00268 };
00269 
00270 template<typename Derived, typename OtherDerived>
00271 struct ei_compute_inverse<Derived, OtherDerived, 4>
00272 {
00273   static inline void run(const Derived& matrix, OtherDerived* result)
00274   {
00275     ei_compute_inverse_in_size4_case<Derived, OtherDerived>::run(matrix, *result);
00276   }
00277 };
00278 
00292 template<typename Derived>
00293 template<typename OtherDerived>
00294 inline void MatrixBase<Derived>::computeInverse(MatrixBase<OtherDerived> *result) const
00295 {
00296   ei_assert(rows() == cols());
00297   EIGEN_STATIC_ASSERT(NumTraits<Scalar>::HasFloatingPoint,NUMERIC_TYPE_MUST_BE_FLOATING_POINT)
00298   ei_compute_inverse<PlainMatrixType, OtherDerived>::run(eval(), static_cast<OtherDerived*>(result));
00299 }
00300 
00315 template<typename Derived>
00316 inline const typename MatrixBase<Derived>::PlainMatrixType MatrixBase<Derived>::inverse() const
00317 {
00318   PlainMatrixType result(rows(), cols());
00319   computeInverse(&result);
00320   return result;
00321 }
00322 
00323 #endif // EIGEN_INVERSE_H