pcl: sparse_matrix.hpp Source File

Go to the documentation of this file.
00001 /*
00002 Copyright (c) 2006, Michael Kazhdan and Matthew Bolitho
00003 All rights reserved.
00004 
00005 Redistribution and use in source and binary forms, with or without modification,
00006 are permitted provided that the following conditions are met:
00007 
00008 Redistributions of source code must retain the above copyright notice, this list of
00009 conditions and the following disclaimer. Redistributions in binary form must reproduce
00010 the above copyright notice, this list of conditions and the following disclaimer
00011 in the documentation and/or other materials provided with the distribution. 
00012 
00013 Neither the name of the Johns Hopkins University nor the names of its contributors
00014 may be used to endorse or promote products derived from this software without specific
00015 prior written permission. 
00016 
00017 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
00018 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO THE IMPLIED WARRANTIES 
00019 OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
00020 SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
00021 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
00022 TO, PROCUREMENT OF SUBSTITUTE  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
00023 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00024 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
00025 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
00026 DAMAGE.
00027 */
00028 
00029 #include <float.h>
00030 #ifdef _WIN32
00031 # ifndef WIN32_LEAN_AND_MEAN
00032 #  define WIN32_LEAN_AND_MEAN
00033 # endif // WIN32_LEAN_AND_MEAN
00034 # ifndef NOMINMAX
00035 #  define NOMINMAX
00036 # endif // NOMINMAX
00037 # include <windows.h>
00038 #endif //_WIN32
00039 
00041 //  SparseMatrix //
00044 // Static Allocator Methods and Memebers //
00046 
00047 namespace pcl
00048 {
00049   namespace poisson
00050   {
00051 
00052 
00053     template<class T> int SparseMatrix<T>::UseAlloc=0;
00054     template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
00055     template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
00056     template<class T>
00057     void SparseMatrix<T>::SetAllocator( int blockSize )
00058     {
00059       if(blockSize>0){
00060         UseAlloc=1;
00061         internalAllocator.set(blockSize);
00062       }
00063       else{UseAlloc=0;}
00064     }
00066     // SparseMatrix Methods and Memebers //
00068 
00069     template< class T >
00070     SparseMatrix< T >::SparseMatrix( void )
00071     {
00072       _contiguous = false;
00073       _maxEntriesPerRow = 0;
00074       rows = 0;
00075       rowSizes = NULL;
00076       m_ppElements = NULL;
00077     }
00078 
00079     template< class T > SparseMatrix< T >::SparseMatrix( int rows                        ) : SparseMatrix< T >() { Resize( rows ); }
00080     template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
00081 
00082     template< class T >
00083     SparseMatrix< T >::SparseMatrix( const SparseMatrix& M ) : SparseMatrix< T >()
00084     {
00085       if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
00086       else                Resize( M.rows );
00087       for( int i=0 ; i<rows ; i++ )
00088       {
00089         SetRowSize( i , M.rowSizes[i] );
00090         memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
00091       }
00092     }
00093     template<class T>
00094     int SparseMatrix<T>::Entries( void ) const
00095     {
00096       int e = 0;
00097       for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
00098       return e;
00099     }
00100     template<class T>
00101     SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M)
00102     {
00103       if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
00104       else                Resize( M.rows );
00105       for( int i=0 ; i<rows ; i++ )
00106       {
00107         SetRowSize( i , M.rowSizes[i] );
00108         memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
00109       }
00110       return *this;
00111     }
00112 
00113     template<class T>
00114     SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
00115 
00116     template< class T >
00117     bool SparseMatrix< T >::write( const char* fileName ) const
00118     {
00119       FILE* fp = fopen( fileName , "wb" );
00120       if( !fp ) return false;
00121       bool ret = write( fp );
00122       fclose( fp );
00123       return ret;
00124     }
00125     template< class T >
00126     bool SparseMatrix< T >::read( const char* fileName )
00127     {
00128       FILE* fp = fopen( fileName , "rb" );
00129       if( !fp ) return false;
00130       bool ret = read( fp );
00131       fclose( fp );
00132       return ret;
00133     }
00134     template< class T >
00135     bool SparseMatrix< T >::write( FILE* fp ) const
00136     {
00137       if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
00138       if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
00139       for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
00140       return true;
00141     }
00142     template< class T >
00143     bool SparseMatrix< T >::read( FILE* fp )
00144     {
00145       int r;
00146       if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
00147       Resize( r );
00148       if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
00149       for( int i=0 ; i<rows ; i++ )
00150       {
00151         r = rowSizes[i];
00152         rowSizes[i] = 0;
00153         SetRowSize( i , r );
00154         if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
00155       }
00156       return true;
00157     }
00158 
00159 
00160     template< class T >
00161     void SparseMatrix< T >::Resize( int r )
00162     {
00163       if( rows>0 )
00164       {
00165 
00166         if( !UseAlloc )
00167           if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
00168           else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
00169         free( m_ppElements );
00170         free( rowSizes );
00171       }
00172       rows = r;
00173       if( r )
00174       {
00175         rowSizes = ( int* )malloc( sizeof( int ) * r );
00176         memset( rowSizes , 0 , sizeof( int ) * r );
00177         m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
00178       }
00179       _contiguous = false;
00180       _maxEntriesPerRow = 0;
00181     }
00182     template< class T >
00183     void SparseMatrix< T >::Resize( int r , int e )
00184     {
00185       if( rows>0 )
00186       {
00187         if( !UseAlloc )
00188           if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
00189           else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
00190         free( m_ppElements );
00191         free( rowSizes );
00192       }
00193       rows = r;
00194       if( r )
00195       {
00196         rowSizes = ( int* )malloc( sizeof( int ) * r );
00197         memset( rowSizes , 0 , sizeof( int ) * r );
00198         m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
00199         m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
00200         for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
00201       }
00202       _contiguous = true;
00203       _maxEntriesPerRow = e;
00204     }
00205 
00206     template<class T>
00207     void SparseMatrix< T >::SetRowSize( int row , int count )
00208     {
00209       if( _contiguous )
00210       {
00211         if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
00212         rowSizes[row] = count;
00213       }
00214       else if( row>=0 && row<rows )
00215       {
00216         if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
00217         else
00218         {
00219           if( rowSizes[row] ) free( m_ppElements[row] );
00220           if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
00221         }
00222       }
00223     }
00224 
00225 
00226     template<class T>
00227     void SparseMatrix<T>::SetZero()
00228     {
00229       Resize(this->m_N, this->m_M);
00230     }
00231 
00232     template<class T>
00233     void SparseMatrix<T>::SetIdentity()
00234     {
00235       SetZero();
00236       for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
00237         (*this)(ij,ij) = T(1);
00238     }
00239 
00240     template<class T>
00241     SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const
00242     {
00243       SparseMatrix<T> M(*this);
00244       M *= V;
00245       return M;
00246     }
00247 
00248     template<class T>
00249     SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V)
00250     {
00251       for (int i=0; i<this->Rows(); i++)
00252       {
00253         for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
00254       }
00255       return *this;
00256     }
00257 
00258     template<class T>
00259     SparseMatrix<T> SparseMatrix<T>::Multiply( const SparseMatrix<T>& M ) const
00260     {
00261       SparseMatrix<T> R( this->Rows(), M.Columns() );
00262       for(int i=0; i<R.Rows(); i++){
00263         for(int ii=0;ii<m_ppElements[i].size();ii++){
00264           int N=m_ppElements[i][ii].N;
00265           T Value=m_ppElements[i][ii].Value;
00266           for(int jj=0;jj<M.m_ppElements[N].size();jj++){
00267             R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
00268           }
00269         }
00270       }
00271       return R;
00272     }
00273 
00274     template<class T>
00275     template<class T2>
00276     Vector<T2> SparseMatrix<T>::Multiply( const Vector<T2>& V ) const
00277     {
00278       Vector<T2> R( rows );
00279 
00280       for (int i=0; i<rows; i++)
00281       {
00282         T2 temp=T2();
00283         for(int ii=0;ii<rowSizes[i];ii++){
00284           temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
00285         }
00286         R(i)=temp;
00287       }
00288       return R;
00289     }
00290 
00291     template<class T>
00292     template<class T2>
00293     void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
00294     {
00295 #pragma omp parallel for num_threads( threads ) schedule( static )
00296       for( int i=0 ; i<rows ; i++ )
00297       {
00298         T2 temp = T2();
00299         temp *= 0;
00300         for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
00301         Out.m_pV[i] = temp;
00302       }
00303     }
00304 
00305     template<class T>
00306     SparseMatrix<T> SparseMatrix<T>::operator * (const SparseMatrix<T>& M) const
00307     {
00308       return Multiply(M);
00309     }
00310     template<class T>
00311     template<class T2>
00312     Vector<T2> SparseMatrix<T>::operator * (const Vector<T2>& V) const
00313     {
00314       return Multiply(V);
00315     }
00316 
00317     template<class T>
00318     SparseMatrix<T> SparseMatrix<T>::Transpose() const
00319     {
00320       SparseMatrix<T> M( this->Columns(), this->Rows() );
00321 
00322       for (int i=0; i<this->Rows(); i++)
00323       {
00324         for(int ii=0;ii<m_ppElements[i].size();ii++){
00325           M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
00326         }
00327       }
00328       return M;
00329     }
00330 
00331     template<class T>
00332     template<class T2>
00333     int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
00334     {
00335       if( reset )
00336       {
00337         solution.Resize( b.Dimensions() );
00338         solution.SetZero();
00339       }
00340       Vector< T2 > r;
00341       r.Resize( solution.Dimensions() );
00342       M.Multiply( solution , r );
00343       r = b - r;
00344       Vector< T2 > d = r;
00345       double delta_new , delta_0;
00346       for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
00347       delta_0 = delta_new;
00348       if( delta_new<eps ) return 0;
00349       int ii;
00350       Vector< T2 > q;
00351       q.Resize( d.Dimensions() );
00352       for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
00353       {
00354         M.Multiply( d , q , threads );
00355         double dDotQ = 0 , alpha = 0;
00356         for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
00357         alpha = delta_new / dDotQ;
00358 #pragma omp parallel for num_threads( threads ) schedule( static )
00359         for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
00360         if( !(ii%50) )
00361         {
00362           r.Resize( solution.Dimensions() );
00363           M.Multiply( solution , r , threads );
00364           r = b - r;
00365         }
00366         else
00367 #pragma omp parallel for num_threads( threads ) schedule( static )
00368           for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
00369 
00370         double delta_old = delta_new , beta;
00371         delta_new = 0;
00372         for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
00373         beta = delta_new / delta_old;
00374 #pragma omp parallel for num_threads( threads ) schedule( static )
00375         for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
00376       }
00377       return ii;
00378     }
00379 
00380     // Solve for x s.t. M(x)=b by solving for x s.t. M^tM(x)=M^t(b)
00381     template<class T>
00382     int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
00383       SparseMatrix mTranspose=M.Transpose();
00384       Vector<T> bb=mTranspose*b;
00385       Vector<T> d,r,Md;
00386       T alpha,beta,rDotR;
00387       int i;
00388 
00389       solution.Resize(M.Columns());
00390       solution.SetZero();
00391 
00392       d=r=bb;
00393       rDotR=r.Dot(r);
00394       for(i=0;i<iters && rDotR>eps;i++){
00395         T temp;
00396         Md=mTranspose*(M*d);
00397         alpha=rDotR/d.Dot(Md);
00398         solution+=d*alpha;
00399         r-=Md*alpha;
00400         temp=r.Dot(r);
00401         beta=temp/rDotR;
00402         rDotR=temp;
00403         d=r+d*beta;
00404       }
00405       return i;
00406     }
00407 
00408 
00409 
00410 
00412     // SparseSymmetricMatrix //
00414     template<class T>
00415     template<class T2>
00416     Vector<T2> SparseSymmetricMatrix<T>::operator * (const Vector<T2>& V) const {return Multiply(V);}
00417     template<class T>
00418     template<class T2>
00419     Vector<T2> SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& V ) const
00420     {
00421       Vector<T2> R( SparseMatrix<T>::rows );
00422 
00423       for(int i=0; i<SparseMatrix<T>::rows; i++){
00424         for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
00425           int j=SparseMatrix<T>::m_ppElements[i][ii].N;
00426           R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
00427           R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
00428         }
00429       }
00430       return R;
00431     }
00432 
00433     template<class T>
00434     template<class T2>
00435     void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
00436     {
00437       Out.SetZero();
00438       const T2* in = &In[0];
00439       T2* out = &Out[0];
00440       T2 dcTerm = T2( 0 );
00441       if( addDCTerm )
00442       {
00443         for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
00444         dcTerm /= SparseMatrix<T>::rows;
00445       }
00446       for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
00447       {
00448         const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i];
00449         const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
00450         const T2& in_i_ = in[i];
00451         T2 out_i = T2(0);
00452         for( ; temp!=end ; temp++ )
00453         {
00454           int j=temp->N;
00455           T2 v=temp->Value;
00456           out_i += v * in[j];
00457           out[j] += v * in_i_;
00458         }
00459         out[i] += out_i;
00460       }
00461       if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
00462     }
00463     template<class T>
00464     template<class T2>
00465     void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
00466     {
00467       int dim = int( In.Dimensions() );
00468       const T2* in = &In[0];
00469       int threads = OutScratch.threads();
00470       if( addDCTerm )
00471       {
00472         T2 dcTerm = 0;
00473 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
00474         for( int t=0 ; t<threads ; t++ )
00475         {
00476           T2* out = OutScratch[t];
00477           memset( out , 0 , sizeof( T2 ) * dim );
00478           for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
00479           {
00480             const T2& in_i_ = in[i];
00481             T2& out_i_ = out[i];
00482             for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
00483             {
00484               int j = temp->N;
00485               T2 v = temp->Value;
00486               out_i_ += v * in[j];
00487               out[j] += v * in_i_;
00488             }
00489             dcTerm += in_i_;
00490           }
00491         }
00492         dcTerm /= dim;
00493         dim = int( Out.Dimensions() );
00494         T2* out = &Out[0];
00495 #pragma omp parallel for num_threads( threads ) schedule( static )
00496         for( int i=0 ; i<dim ; i++ )
00497         {
00498           T2 _out = dcTerm;
00499           for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
00500           out[i] = _out;
00501         }
00502       }
00503       else
00504       {
00505 #pragma omp parallel for num_threads( threads )
00506         for( int t=0 ; t<threads ; t++ )
00507         {
00508           T2* out = OutScratch[t];
00509           memset( out , 0 , sizeof( T2 ) * dim );
00510           for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
00511           {
00512             const T2& in_i_ = in[i];
00513             T2& out_i_ = out[i];
00514             for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
00515             {
00516               int j = temp->N;
00517               T2 v = temp->Value;
00518               out_i_ += v * in[j];
00519               out[j] += v * in_i_;
00520             }
00521           }
00522         }
00523         dim = int( Out.Dimensions() );
00524         T2* out = &Out[0];
00525 #pragma omp parallel for num_threads( threads ) schedule( static )
00526         for( int i=0 ; i<dim ; i++ )
00527         {
00528           T2 _out = T2(0);
00529           for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
00530           out[i] = _out;
00531         }
00532       }
00533     }
00534     template<class T>
00535     template<class T2>
00536     void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
00537     {
00538       int dim = In.Dimensions();
00539       const T2* in = &In[0];
00540       int threads = OutScratch.size();
00541 #pragma omp parallel for num_threads( threads )
00542       for( int t=0 ; t<threads ; t++ )
00543         for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
00544 #pragma omp parallel for num_threads( threads )
00545       for( int t=0 ; t<threads ; t++ )
00546       {
00547         T2* out = OutScratch[t];
00548         for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
00549         {
00550           const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i];
00551           const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
00552           const T2& in_i_ = in[i];
00553           T2& out_i_ = out[i];
00554           for(  ; temp!=end ; temp++ )
00555           {
00556             int j = temp->N;
00557             T2 v = temp->Value;
00558             out_i_ += v * in[j];
00559             out[j] += v * in_i_;
00560           }
00561         }
00562       }
00563       T2* out = &Out[0];
00564 #pragma omp parallel for num_threads( threads ) schedule( static )
00565       for( int i=0 ; i<Out.Dimensions() ; i++ )
00566       {
00567         T2& _out = out[i];
00568         _out = T2(0);
00569         for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
00570       }
00571     }
00572 #if defined _WIN32 && !defined __MINGW32__
00573 #ifndef _AtomicIncrement_
00574 #define _AtomicIncrement_
00575     inline void AtomicIncrement( volatile float* ptr , float addend )
00576     {
00577       float newValue = *ptr;
00578       LONG& _newValue = *( (LONG*)&newValue );
00579       LONG  _oldValue;
00580       for( ;; )
00581       {
00582         _oldValue = _newValue;
00583         newValue += addend;
00584         _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
00585         if( _newValue==_oldValue ) break;
00586       }
00587     }
00588     inline void AtomicIncrement( volatile double* ptr , double addend )
00589     //inline void AtomicIncrement( double* ptr , double addend )
00590     {
00591       double newValue = *ptr;
00592       LONGLONG& _newValue = *( (LONGLONG*)&newValue );
00593       LONGLONG  _oldValue;
00594       do
00595       {
00596         _oldValue = _newValue;
00597         newValue += addend;
00598         _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
00599       }
00600       while( _newValue!=_oldValue );
00601     }
00602 #endif // _AtomicIncrement_
00603     template< class T >
00604     void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
00605     {
00606       Out.SetZero();
00607       const float* in = &In[0];
00608       float* out = &Out[0];
00609       if( partition )
00610 #pragma omp parallel for num_threads( threads )
00611         for( int t=0 ; t<threads ; t++ )
00612           for( int i=partition[t] ; i<partition[t+1] ; i++ )
00613           {
00614             const MatrixEntry< T >* temp = A[i];
00615             const MatrixEntry< T >* end = temp + A.rowSizes[i];
00616             const float& in_i = in[i];
00617             float out_i = 0.;
00618             for( ; temp!=end ; temp++ )
00619             {
00620               int j = temp->N;
00621               float v = temp->Value;
00622               out_i += v * in[j];
00623               AtomicIncrement( out+j , v * in_i );
00624             }
00625             AtomicIncrement( out+i , out_i );
00626           }
00627       else
00628 #pragma omp parallel for num_threads( threads )
00629         for( int i=0 ; i<A.rows ; i++ )
00630         {
00631           const MatrixEntry< T >* temp = A[i];
00632           const MatrixEntry< T >* end = temp + A.rowSizes[i];
00633           const float& in_i = in[i];
00634           float out_i = 0.f;
00635           for( ; temp!=end ; temp++ )
00636           {
00637             int j = temp->N;
00638             float v = temp->Value;
00639             out_i += v * in[j];
00640             AtomicIncrement( out+j , v * in_i );
00641           }
00642           AtomicIncrement( out+i , out_i );
00643         }
00644     }
00645     template< class T >
00646     void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
00647     {
00648       Out.SetZero();
00649       const double* in = &In[0];
00650       double* out = &Out[0];
00651 
00652       if( partition )
00653 #pragma omp parallel for num_threads( threads )
00654         for( int t=0 ; t<threads ; t++ )
00655           for( int i=partition[t] ; i<partition[t+1] ; i++ )
00656           {
00657             const MatrixEntry< T >* temp = A[i];
00658             const MatrixEntry< T >* end = temp + A.rowSizes[i];
00659             const double& in_i = in[i];
00660             double out_i = 0.;
00661             for( ; temp!=end ; temp++ )
00662             {
00663               int j = temp->N;
00664               T v = temp->Value;
00665               out_i += v * in[j];
00666               AtomicIncrement( out+j , v * in_i );
00667             }
00668             AtomicIncrement( out+i , out_i );
00669           }
00670       else
00671 #pragma omp parallel for num_threads( threads )
00672         for( int i=0 ; i<A.rows ; i++ )
00673         {
00674           const MatrixEntry< T >* temp = A[i];
00675           const MatrixEntry< T >* end = temp + A.rowSizes[i];
00676           const double& in_i = in[i];
00677           double out_i = 0.;
00678           for( ; temp!=end ; temp++ )
00679           {
00680             int j = temp->N;
00681             T v = temp->Value;
00682             out_i += v * in[j];
00683             AtomicIncrement( out+j , v * in_i );
00684           }
00685           AtomicIncrement( out+i , out_i );
00686         }
00687     }
00688 
00689     template< class T >
00690     template< class T2 >
00691     int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
00692     {
00693       eps *= eps;
00694       int dim = b.Dimensions();
00695       if( reset )
00696       {
00697         x.Resize( dim );
00698         x.SetZero();
00699       }
00700       Vector< T2 > r( dim ) , d( dim ) , q( dim );
00701       Vector< T2 > temp;
00702       if( solveNormal ) temp.Resize( dim );
00703       T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
00704       const T2* _b = &b[0];
00705 
00706       std::vector< int > partition( threads+1 );
00707       {
00708         int eCount = 0;
00709         for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
00710         partition[0] = 0;
00711 #pragma omp parallel for num_threads( threads )
00712         for( int t=0 ; t<threads ; t++ )
00713         {
00714           int _eCount = 0;
00715           for( int i=0 ; i<A.rows ; i++ )
00716           {
00717             _eCount += A.rowSizes[i];
00718             if( _eCount*threads>=eCount*(t+1) )
00719             {
00720               partition[t+1] = i;
00721               break;
00722             }
00723           }
00724         }
00725         partition[threads] = A.rows;
00726       }
00727       if( solveNormal )
00728       {
00729         MultiplyAtomic( A , x , temp , threads , &partition[0] );
00730         MultiplyAtomic( A , temp , r , threads , &partition[0] );
00731         MultiplyAtomic( A , b , temp , threads , &partition[0] );
00732 #pragma omp parallel for num_threads( threads ) schedule( static )
00733         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
00734       }
00735       else
00736       {
00737         MultiplyAtomic( A , x , r , threads , &partition[0] );
00738 #pragma omp parallel for num_threads( threads ) schedule( static )
00739         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
00740       }
00741       double delta_new = 0 , delta_0;
00742       for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
00743       delta_0 = delta_new;
00744       if( delta_new<eps )
00745       {
00746         fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
00747         return 0;
00748       }
00749       int ii;
00750       for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
00751       {
00752         if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
00753         else              MultiplyAtomic( A , d , q , threads , &partition[0] );
00754         double dDotQ = 0;
00755         for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
00756         T2 alpha = T2( delta_new / dDotQ );
00757 #pragma omp parallel for num_threads( threads ) schedule( static )
00758         for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
00759         if( (ii%50)==(50-1) )
00760         {
00761           r.Resize( dim );
00762           if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
00763           else              MultiplyAtomic( A , x , r , threads , &partition[0] );
00764 #pragma omp parallel for num_threads( threads ) schedule( static )
00765           for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
00766         }
00767         else
00768 #pragma omp parallel for num_threads( threads ) schedule( static )
00769           for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
00770 
00771         double delta_old = delta_new;
00772         delta_new = 0;
00773         for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
00774         T2 beta = T2( delta_new / delta_old );
00775 #pragma omp parallel for num_threads( threads ) schedule( static )
00776         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
00777       }
00778       return ii;
00779     }
00780 #endif // _WIN32 && !__MINGW32__
00781     template< class T >
00782     template< class T2 >
00783     int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
00784     {
00785       int threads = scratch.threads();
00786       eps *= eps;
00787       int dim = int( b.Dimensions() );
00788       Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
00789       if( reset ) x.Resize( dim );
00790       if( solveNormal ) temp.Resize( dim );
00791       T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
00792       const T2* _b = &b[0];
00793 
00794       double delta_new = 0 , delta_0;
00795       if( solveNormal )
00796       {
00797         A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
00798 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00799         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
00800       }
00801       else
00802       {
00803         A.Multiply( x , r , scratch , addDCTerm );
00804 #pragma omp parallel for num_threads( threads )  reduction ( + : delta_new )
00805         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
00806       }
00807       delta_0 = delta_new;
00808       if( delta_new<eps )
00809       {
00810         fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
00811         return 0;
00812       }
00813       int ii;
00814       for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
00815       {
00816         if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
00817         else              A.Multiply( d , q , scratch , addDCTerm );
00818         double dDotQ = 0;
00819 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
00820         for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
00821         T2 alpha = T2( delta_new / dDotQ );
00822         double delta_old = delta_new;
00823         delta_new = 0;
00824         if( (ii%50)==(50-1) )
00825         {
00826 #pragma omp parallel for num_threads( threads )
00827           for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
00828           r.Resize( dim );
00829           if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
00830           else              A.Multiply( x , r , scratch , addDCTerm );
00831 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00832           for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00833         }
00834         else
00835 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00836           for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] ,  _x[i] += _d[i] * alpha;
00837 
00838         T2 beta = T2( delta_new / delta_old );
00839 #pragma omp parallel for num_threads( threads )
00840         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
00841       }
00842       return ii;
00843     }
00844     template< class T >
00845     template< class T2 >
00846     int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
00847     {
00848       eps *= eps;
00849       int dim = int( b.Dimensions() );
00850       MapReduceVector< T2 > outScratch;
00851       if( threads<1 ) threads = 1;
00852       if( threads>1 ) outScratch.resize( threads , dim );
00853       if( reset ) x.Resize( dim );
00854       Vector< T2 > r( dim ) , d( dim ) , q( dim );
00855       Vector< T2 > temp;
00856       if( solveNormal ) temp.Resize( dim );
00857       T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
00858       const T2* _b = &b[0];
00859 
00860       double delta_new = 0 , delta_0;
00861 
00862       if( solveNormal )
00863       {
00864         if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
00865         else            A.Multiply( x , temp , addDCTerm )              , A.Multiply( temp , r , addDCTerm )              , A.Multiply( b , temp , addDCTerm );
00866 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00867         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
00868       }
00869       else
00870       {
00871         if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
00872         else            A.Multiply( x , r , addDCTerm );
00873 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00874         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
00875       }
00876 
00877       delta_0 = delta_new;
00878       if( delta_new<eps )
00879       {
00880         fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
00881         return 0;
00882       }
00883       int ii;
00884       for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
00885       {
00886         if( solveNormal )
00887         {
00888           if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
00889           else            A.Multiply( d , temp , addDCTerm )              , A.Multiply( temp , q , addDCTerm );
00890         }
00891         else
00892         {
00893           if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
00894           else            A.Multiply( d , q , addDCTerm );
00895         }
00896         double dDotQ = 0;
00897 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
00898         for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
00899         T2 alpha = T2( delta_new / dDotQ );
00900         double delta_old = delta_new;
00901         delta_new = 0;
00902 
00903         if( (ii%50)==(50-1) )
00904         {
00905 #pragma omp parallel for num_threads( threads )
00906           for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
00907           r.SetZero();
00908           if( solveNormal )
00909           {
00910             if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
00911             else            A.Multiply( x , temp , addDCTerm )              , A.Multiply( temp , r , addDCTerm );
00912           }
00913           else
00914           {
00915             if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
00916             else            A.Multiply( x , r , addDCTerm );
00917           }
00918 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
00919           for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00920         }
00921         else
00922         {
00923 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00924           for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00925         }
00926 
00927         T2 beta = T2( delta_new / delta_old );
00928 #pragma omp parallel for num_threads( threads )
00929         for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
00930       }
00931       return ii;
00932     }
00933 
00934     template<class T>
00935     template<class T2>
00936     int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
00937     {
00938       Vector<T2> d,r,Md;
00939 
00940       if(reset)
00941       {
00942         solution.Resize(b.Dimensions());
00943         solution.SetZero();
00944       }
00945       Md.Resize(M.rows);
00946       for( int i=0 ; i<iters ; i++ )
00947       {
00948         M.Multiply( solution , Md );
00949         r = b-Md;
00950         // solution_new[j] * diagonal[j] + ( Md[j] - solution_old[j] * diagonal[j] ) = b[j]
00951         // solution_new[j] = ( b[j] - ( Md[j] - solution_old[j] * diagonal[j] ) ) / diagonal[j]
00952         // solution_new[j] = ( b[j] - Md[j] ) / diagonal[j] + solution_old[j]
00953         for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
00954       }
00955       return iters;
00956     }
00957     template< class T >
00958     template< class T2 >
00959     void SparseSymmetricMatrix< T >::getDiagonal( Vector< T2 >& diagonal ) const
00960     {
00961       diagonal.Resize( SparseMatrix<T>::rows );
00962       for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
00963       {
00964         diagonal[i] = 0.;
00965         for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
00966       }
00967     }
00968 
00969   }
00970 }