00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <float.h>
00030 #ifdef _WIN32
00031 # ifndef WIN32_LEAN_AND_MEAN
00032 # define WIN32_LEAN_AND_MEAN
00033 # endif // WIN32_LEAN_AND_MEAN
00034 # ifndef NOMINMAX
00035 # define NOMINMAX
00036 # endif // NOMINMAX
00037 # include <windows.h>
00038 #endif //_WIN32
00039
00041
00044
00046
00047 namespace pcl
00048 {
00049 namespace poisson
00050 {
00051
00052
00053 template<class T> int SparseMatrix<T>::UseAlloc=0;
00054 template<class T> Allocator<MatrixEntry<T> > SparseMatrix<T>::internalAllocator;
00055 template<class T> int SparseMatrix<T>::UseAllocator(void){return UseAlloc;}
00056 template<class T>
00057 void SparseMatrix<T>::SetAllocator( int blockSize )
00058 {
00059 if(blockSize>0){
00060 UseAlloc=1;
00061 internalAllocator.set(blockSize);
00062 }
00063 else{UseAlloc=0;}
00064 }
00066
00068
00069 template< class T >
00070 SparseMatrix< T >::SparseMatrix( void )
00071 {
00072 _contiguous = false;
00073 _maxEntriesPerRow = 0;
00074 rows = 0;
00075 rowSizes = NULL;
00076 m_ppElements = NULL;
00077 }
00078
00079 template< class T > SparseMatrix< T >::SparseMatrix( int rows ) : SparseMatrix< T >() { Resize( rows ); }
00080 template< class T > SparseMatrix< T >::SparseMatrix( int rows , int maxEntriesPerRow ) : SparseMatrix< T >() { Resize( rows , maxEntriesPerRow ); }
00081
00082 template< class T >
00083 SparseMatrix< T >::SparseMatrix( const SparseMatrix& M ) : SparseMatrix< T >()
00084 {
00085 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
00086 else Resize( M.rows );
00087 for( int i=0 ; i<rows ; i++ )
00088 {
00089 SetRowSize( i , M.rowSizes[i] );
00090 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
00091 }
00092 }
00093 template<class T>
00094 int SparseMatrix<T>::Entries( void ) const
00095 {
00096 int e = 0;
00097 for( int i=0 ; i<rows ; i++ ) e += int( rowSizes[i] );
00098 return e;
00099 }
00100 template<class T>
00101 SparseMatrix<T>& SparseMatrix<T>::operator = (const SparseMatrix<T>& M)
00102 {
00103 if( M._contiguous ) Resize( M.rows , M._maxEntriesPerRow );
00104 else Resize( M.rows );
00105 for( int i=0 ; i<rows ; i++ )
00106 {
00107 SetRowSize( i , M.rowSizes[i] );
00108 memcpy( (*this)[i] , M[i] , sizeof( MatrixEntry< T > ) * rowSizes[i] );
00109 }
00110 return *this;
00111 }
00112
00113 template<class T>
00114 SparseMatrix<T>::~SparseMatrix( void ){ Resize( 0 ); }
00115
00116 template< class T >
00117 bool SparseMatrix< T >::write( const char* fileName ) const
00118 {
00119 FILE* fp = fopen( fileName , "wb" );
00120 if( !fp ) return false;
00121 bool ret = write( fp );
00122 fclose( fp );
00123 return ret;
00124 }
00125 template< class T >
00126 bool SparseMatrix< T >::read( const char* fileName )
00127 {
00128 FILE* fp = fopen( fileName , "rb" );
00129 if( !fp ) return false;
00130 bool ret = read( fp );
00131 fclose( fp );
00132 return ret;
00133 }
00134 template< class T >
00135 bool SparseMatrix< T >::write( FILE* fp ) const
00136 {
00137 if( fwrite( &rows , sizeof( int ) , 1 , fp )!=1 ) return false;
00138 if( fwrite( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
00139 for( int i=0 ; i<rows ; i++ ) if( fwrite( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
00140 return true;
00141 }
00142 template< class T >
00143 bool SparseMatrix< T >::read( FILE* fp )
00144 {
00145 int r;
00146 if( fread( &r , sizeof( int ) , 1 , fp )!=1 ) return false;
00147 Resize( r );
00148 if( fread( rowSizes , sizeof( int ) , rows , fp )!=rows ) return false;
00149 for( int i=0 ; i<rows ; i++ )
00150 {
00151 r = rowSizes[i];
00152 rowSizes[i] = 0;
00153 SetRowSize( i , r );
00154 if( fread( (*this)[i] , sizeof( MatrixEntry< T > ) , rowSizes[i] , fp )!=rowSizes[i] ) return false;
00155 }
00156 return true;
00157 }
00158
00159
00160 template< class T >
00161 void SparseMatrix< T >::Resize( int r )
00162 {
00163 if( rows>0 )
00164 {
00165
00166 if( !UseAlloc )
00167 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
00168 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
00169 free( m_ppElements );
00170 free( rowSizes );
00171 }
00172 rows = r;
00173 if( r )
00174 {
00175 rowSizes = ( int* )malloc( sizeof( int ) * r );
00176 memset( rowSizes , 0 , sizeof( int ) * r );
00177 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
00178 }
00179 _contiguous = false;
00180 _maxEntriesPerRow = 0;
00181 }
00182 template< class T >
00183 void SparseMatrix< T >::Resize( int r , int e )
00184 {
00185 if( rows>0 )
00186 {
00187 if( !UseAlloc )
00188 if( _contiguous ){ if( _maxEntriesPerRow ) free( m_ppElements[0] ); }
00189 else for( int i=0 ; i<rows ; i++ ){ if( rowSizes[i] ) free( m_ppElements[i] ); }
00190 free( m_ppElements );
00191 free( rowSizes );
00192 }
00193 rows = r;
00194 if( r )
00195 {
00196 rowSizes = ( int* )malloc( sizeof( int ) * r );
00197 memset( rowSizes , 0 , sizeof( int ) * r );
00198 m_ppElements = ( MatrixEntry< T >** )malloc( sizeof( MatrixEntry< T >* ) * r );
00199 m_ppElements[0] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * r * e );
00200 for( int i=1 ; i<r ; i++ ) m_ppElements[i] = m_ppElements[i-1] + e;
00201 }
00202 _contiguous = true;
00203 _maxEntriesPerRow = e;
00204 }
00205
00206 template<class T>
00207 void SparseMatrix< T >::SetRowSize( int row , int count )
00208 {
00209 if( _contiguous )
00210 {
00211 if( count>_maxEntriesPerRow ) fprintf( stderr , "[ERROR] Cannot set row size on contiguous matrix: %d<=%d\n" , count , _maxEntriesPerRow ) , exit( 0 );
00212 rowSizes[row] = count;
00213 }
00214 else if( row>=0 && row<rows )
00215 {
00216 if( UseAlloc ) m_ppElements[row] = internalAllocator.newElements(count);
00217 else
00218 {
00219 if( rowSizes[row] ) free( m_ppElements[row] );
00220 if( count>0 ) m_ppElements[row] = ( MatrixEntry< T >* )malloc( sizeof( MatrixEntry< T > ) * count );
00221 }
00222 }
00223 }
00224
00225
00226 template<class T>
00227 void SparseMatrix<T>::SetZero()
00228 {
00229 Resize(this->m_N, this->m_M);
00230 }
00231
00232 template<class T>
00233 void SparseMatrix<T>::SetIdentity()
00234 {
00235 SetZero();
00236 for(int ij=0; ij < Min( this->Rows(), this->Columns() ); ij++)
00237 (*this)(ij,ij) = T(1);
00238 }
00239
00240 template<class T>
00241 SparseMatrix<T> SparseMatrix<T>::operator * (const T& V) const
00242 {
00243 SparseMatrix<T> M(*this);
00244 M *= V;
00245 return M;
00246 }
00247
00248 template<class T>
00249 SparseMatrix<T>& SparseMatrix<T>::operator *= (const T& V)
00250 {
00251 for (int i=0; i<this->Rows(); i++)
00252 {
00253 for(int ii=0;ii<m_ppElements[i].size();i++){m_ppElements[i][ii].Value*=V;}
00254 }
00255 return *this;
00256 }
00257
00258 template<class T>
00259 SparseMatrix<T> SparseMatrix<T>::Multiply( const SparseMatrix<T>& M ) const
00260 {
00261 SparseMatrix<T> R( this->Rows(), M.Columns() );
00262 for(int i=0; i<R.Rows(); i++){
00263 for(int ii=0;ii<m_ppElements[i].size();ii++){
00264 int N=m_ppElements[i][ii].N;
00265 T Value=m_ppElements[i][ii].Value;
00266 for(int jj=0;jj<M.m_ppElements[N].size();jj++){
00267 R(i,M.m_ppElements[N][jj].N) += Value * M.m_ppElements[N][jj].Value;
00268 }
00269 }
00270 }
00271 return R;
00272 }
00273
00274 template<class T>
00275 template<class T2>
00276 Vector<T2> SparseMatrix<T>::Multiply( const Vector<T2>& V ) const
00277 {
00278 Vector<T2> R( rows );
00279
00280 for (int i=0; i<rows; i++)
00281 {
00282 T2 temp=T2();
00283 for(int ii=0;ii<rowSizes[i];ii++){
00284 temp+=m_ppElements[i][ii].Value * V.m_pV[m_ppElements[i][ii].N];
00285 }
00286 R(i)=temp;
00287 }
00288 return R;
00289 }
00290
00291 template<class T>
00292 template<class T2>
00293 void SparseMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , int threads ) const
00294 {
00295 #pragma omp parallel for num_threads( threads ) schedule( static )
00296 for( int i=0 ; i<rows ; i++ )
00297 {
00298 T2 temp = T2();
00299 temp *= 0;
00300 for( int j=0 ; j<rowSizes[i] ; j++ ) temp += m_ppElements[i][j].Value * In.m_pV[m_ppElements[i][j].N];
00301 Out.m_pV[i] = temp;
00302 }
00303 }
00304
00305 template<class T>
00306 SparseMatrix<T> SparseMatrix<T>::operator * (const SparseMatrix<T>& M) const
00307 {
00308 return Multiply(M);
00309 }
00310 template<class T>
00311 template<class T2>
00312 Vector<T2> SparseMatrix<T>::operator * (const Vector<T2>& V) const
00313 {
00314 return Multiply(V);
00315 }
00316
00317 template<class T>
00318 SparseMatrix<T> SparseMatrix<T>::Transpose() const
00319 {
00320 SparseMatrix<T> M( this->Columns(), this->Rows() );
00321
00322 for (int i=0; i<this->Rows(); i++)
00323 {
00324 for(int ii=0;ii<m_ppElements[i].size();ii++){
00325 M(m_ppElements[i][ii].N,i) = m_ppElements[i][ii].Value;
00326 }
00327 }
00328 return M;
00329 }
00330
00331 template<class T>
00332 template<class T2>
00333 int SparseMatrix<T>::SolveSymmetric( const SparseMatrix<T>& M , const Vector<T2>& b , int iters , Vector<T2>& solution , const T2 eps , int reset , int threads )
00334 {
00335 if( reset )
00336 {
00337 solution.Resize( b.Dimensions() );
00338 solution.SetZero();
00339 }
00340 Vector< T2 > r;
00341 r.Resize( solution.Dimensions() );
00342 M.Multiply( solution , r );
00343 r = b - r;
00344 Vector< T2 > d = r;
00345 double delta_new , delta_0;
00346 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i] * r.m_pV[i];
00347 delta_0 = delta_new;
00348 if( delta_new<eps ) return 0;
00349 int ii;
00350 Vector< T2 > q;
00351 q.Resize( d.Dimensions() );
00352 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
00353 {
00354 M.Multiply( d , q , threads );
00355 double dDotQ = 0 , alpha = 0;
00356 for( int i=0 ; i<d.Dimensions() ; i++ ) dDotQ += d.m_pV[i] * q.m_pV[i];
00357 alpha = delta_new / dDotQ;
00358 #pragma omp parallel for num_threads( threads ) schedule( static )
00359 for( int i=0 ; i<r.Dimensions() ; i++ ) solution.m_pV[i] += d.m_pV[i] * T2( alpha );
00360 if( !(ii%50) )
00361 {
00362 r.Resize( solution.Dimensions() );
00363 M.Multiply( solution , r , threads );
00364 r = b - r;
00365 }
00366 else
00367 #pragma omp parallel for num_threads( threads ) schedule( static )
00368 for( int i=0 ; i<r.Dimensions() ; i++ ) r.m_pV[i] = r.m_pV[i] - q.m_pV[i] * T2(alpha);
00369
00370 double delta_old = delta_new , beta;
00371 delta_new = 0;
00372 for( int i=0 ; i<r.Dimensions() ; i++ ) delta_new += r.m_pV[i]*r.m_pV[i];
00373 beta = delta_new / delta_old;
00374 #pragma omp parallel for num_threads( threads ) schedule( static )
00375 for( int i=0 ; i<d.Dimensions() ; i++ ) d.m_pV[i] = r.m_pV[i] + d.m_pV[i] * T2( beta );
00376 }
00377 return ii;
00378 }
00379
00380
00381 template<class T>
00382 int SparseMatrix<T>::Solve(const SparseMatrix<T>& M,const Vector<T>& b,int iters,Vector<T>& solution,const T eps){
00383 SparseMatrix mTranspose=M.Transpose();
00384 Vector<T> bb=mTranspose*b;
00385 Vector<T> d,r,Md;
00386 T alpha,beta,rDotR;
00387 int i;
00388
00389 solution.Resize(M.Columns());
00390 solution.SetZero();
00391
00392 d=r=bb;
00393 rDotR=r.Dot(r);
00394 for(i=0;i<iters && rDotR>eps;i++){
00395 T temp;
00396 Md=mTranspose*(M*d);
00397 alpha=rDotR/d.Dot(Md);
00398 solution+=d*alpha;
00399 r-=Md*alpha;
00400 temp=r.Dot(r);
00401 beta=temp/rDotR;
00402 rDotR=temp;
00403 d=r+d*beta;
00404 }
00405 return i;
00406 }
00407
00408
00409
00410
00412
00414 template<class T>
00415 template<class T2>
00416 Vector<T2> SparseSymmetricMatrix<T>::operator * (const Vector<T2>& V) const {return Multiply(V);}
00417 template<class T>
00418 template<class T2>
00419 Vector<T2> SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& V ) const
00420 {
00421 Vector<T2> R( SparseMatrix<T>::rows );
00422
00423 for(int i=0; i<SparseMatrix<T>::rows; i++){
00424 for(int ii=0;ii<SparseMatrix<T>::rowSizes[i];ii++){
00425 int j=SparseMatrix<T>::m_ppElements[i][ii].N;
00426 R(i)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[j];
00427 R(j)+=SparseMatrix<T>::m_ppElements[i][ii].Value * V.m_pV[i];
00428 }
00429 }
00430 return R;
00431 }
00432
00433 template<class T>
00434 template<class T2>
00435 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , bool addDCTerm ) const
00436 {
00437 Out.SetZero();
00438 const T2* in = &In[0];
00439 T2* out = &Out[0];
00440 T2 dcTerm = T2( 0 );
00441 if( addDCTerm )
00442 {
00443 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) dcTerm += in[i];
00444 dcTerm /= SparseMatrix<T>::rows;
00445 }
00446 for( int i=0 ; i<this->SparseMatrix<T>::rows ; i++ )
00447 {
00448 const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i];
00449 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
00450 const T2& in_i_ = in[i];
00451 T2 out_i = T2(0);
00452 for( ; temp!=end ; temp++ )
00453 {
00454 int j=temp->N;
00455 T2 v=temp->Value;
00456 out_i += v * in[j];
00457 out[j] += v * in_i_;
00458 }
00459 out[i] += out_i;
00460 }
00461 if( addDCTerm ) for( int i=0 ; i<SparseMatrix<T>::rows ; i++ ) out[i] += dcTerm;
00462 }
00463 template<class T>
00464 template<class T2>
00465 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , MapReduceVector< T2 >& OutScratch , bool addDCTerm ) const
00466 {
00467 int dim = int( In.Dimensions() );
00468 const T2* in = &In[0];
00469 int threads = OutScratch.threads();
00470 if( addDCTerm )
00471 {
00472 T2 dcTerm = 0;
00473 #pragma omp parallel for num_threads( threads ) reduction ( + : dcTerm )
00474 for( int t=0 ; t<threads ; t++ )
00475 {
00476 T2* out = OutScratch[t];
00477 memset( out , 0 , sizeof( T2 ) * dim );
00478 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
00479 {
00480 const T2& in_i_ = in[i];
00481 T2& out_i_ = out[i];
00482 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
00483 {
00484 int j = temp->N;
00485 T2 v = temp->Value;
00486 out_i_ += v * in[j];
00487 out[j] += v * in_i_;
00488 }
00489 dcTerm += in_i_;
00490 }
00491 }
00492 dcTerm /= dim;
00493 dim = int( Out.Dimensions() );
00494 T2* out = &Out[0];
00495 #pragma omp parallel for num_threads( threads ) schedule( static )
00496 for( int i=0 ; i<dim ; i++ )
00497 {
00498 T2 _out = dcTerm;
00499 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
00500 out[i] = _out;
00501 }
00502 }
00503 else
00504 {
00505 #pragma omp parallel for num_threads( threads )
00506 for( int t=0 ; t<threads ; t++ )
00507 {
00508 T2* out = OutScratch[t];
00509 memset( out , 0 , sizeof( T2 ) * dim );
00510 for( int i=(SparseMatrix<T>::rows*t)/threads ; i<(SparseMatrix<T>::rows*(t+1))/threads ; i++ )
00511 {
00512 const T2& in_i_ = in[i];
00513 T2& out_i_ = out[i];
00514 for( const MatrixEntry< T > *temp = SparseMatrix<T>::m_ppElements[i] , *end = temp+SparseMatrix<T>::rowSizes[i] ; temp!=end ; temp++ )
00515 {
00516 int j = temp->N;
00517 T2 v = temp->Value;
00518 out_i_ += v * in[j];
00519 out[j] += v * in_i_;
00520 }
00521 }
00522 }
00523 dim = int( Out.Dimensions() );
00524 T2* out = &Out[0];
00525 #pragma omp parallel for num_threads( threads ) schedule( static )
00526 for( int i=0 ; i<dim ; i++ )
00527 {
00528 T2 _out = T2(0);
00529 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
00530 out[i] = _out;
00531 }
00532 }
00533 }
00534 template<class T>
00535 template<class T2>
00536 void SparseSymmetricMatrix<T>::Multiply( const Vector<T2>& In , Vector<T2>& Out , std::vector< T2* >& OutScratch , const std::vector< int >& bounds ) const
00537 {
00538 int dim = In.Dimensions();
00539 const T2* in = &In[0];
00540 int threads = OutScratch.size();
00541 #pragma omp parallel for num_threads( threads )
00542 for( int t=0 ; t<threads ; t++ )
00543 for( int i=0 ; i<dim ; i++ ) OutScratch[t][i] = T2(0);
00544 #pragma omp parallel for num_threads( threads )
00545 for( int t=0 ; t<threads ; t++ )
00546 {
00547 T2* out = OutScratch[t];
00548 for( int i=bounds[t] ; i<bounds[t+1] ; i++ )
00549 {
00550 const MatrixEntry<T>* temp = SparseMatrix<T>::m_ppElements[i];
00551 const MatrixEntry<T>* end = temp + SparseMatrix<T>::rowSizes[i];
00552 const T2& in_i_ = in[i];
00553 T2& out_i_ = out[i];
00554 for( ; temp!=end ; temp++ )
00555 {
00556 int j = temp->N;
00557 T2 v = temp->Value;
00558 out_i_ += v * in[j];
00559 out[j] += v * in_i_;
00560 }
00561 }
00562 }
00563 T2* out = &Out[0];
00564 #pragma omp parallel for num_threads( threads ) schedule( static )
00565 for( int i=0 ; i<Out.Dimensions() ; i++ )
00566 {
00567 T2& _out = out[i];
00568 _out = T2(0);
00569 for( int t=0 ; t<threads ; t++ ) _out += OutScratch[t][i];
00570 }
00571 }
00572 #if defined _WIN32 && !defined __MINGW32__
00573 #ifndef _AtomicIncrement_
00574 #define _AtomicIncrement_
00575 inline void AtomicIncrement( volatile float* ptr , float addend )
00576 {
00577 float newValue = *ptr;
00578 LONG& _newValue = *( (LONG*)&newValue );
00579 LONG _oldValue;
00580 for( ;; )
00581 {
00582 _oldValue = _newValue;
00583 newValue += addend;
00584 _newValue = InterlockedCompareExchange( (LONG*) ptr , _newValue , _oldValue );
00585 if( _newValue==_oldValue ) break;
00586 }
00587 }
00588 inline void AtomicIncrement( volatile double* ptr , double addend )
00589
00590 {
00591 double newValue = *ptr;
00592 LONGLONG& _newValue = *( (LONGLONG*)&newValue );
00593 LONGLONG _oldValue;
00594 do
00595 {
00596 _oldValue = _newValue;
00597 newValue += addend;
00598 _newValue = InterlockedCompareExchange64( (LONGLONG*) ptr , _newValue , _oldValue );
00599 }
00600 while( _newValue!=_oldValue );
00601 }
00602 #endif // _AtomicIncrement_
00603 template< class T >
00604 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< float >& In , Vector< float >& Out , int threads , const int* partition=NULL )
00605 {
00606 Out.SetZero();
00607 const float* in = &In[0];
00608 float* out = &Out[0];
00609 if( partition )
00610 #pragma omp parallel for num_threads( threads )
00611 for( int t=0 ; t<threads ; t++ )
00612 for( int i=partition[t] ; i<partition[t+1] ; i++ )
00613 {
00614 const MatrixEntry< T >* temp = A[i];
00615 const MatrixEntry< T >* end = temp + A.rowSizes[i];
00616 const float& in_i = in[i];
00617 float out_i = 0.;
00618 for( ; temp!=end ; temp++ )
00619 {
00620 int j = temp->N;
00621 float v = temp->Value;
00622 out_i += v * in[j];
00623 AtomicIncrement( out+j , v * in_i );
00624 }
00625 AtomicIncrement( out+i , out_i );
00626 }
00627 else
00628 #pragma omp parallel for num_threads( threads )
00629 for( int i=0 ; i<A.rows ; i++ )
00630 {
00631 const MatrixEntry< T >* temp = A[i];
00632 const MatrixEntry< T >* end = temp + A.rowSizes[i];
00633 const float& in_i = in[i];
00634 float out_i = 0.f;
00635 for( ; temp!=end ; temp++ )
00636 {
00637 int j = temp->N;
00638 float v = temp->Value;
00639 out_i += v * in[j];
00640 AtomicIncrement( out+j , v * in_i );
00641 }
00642 AtomicIncrement( out+i , out_i );
00643 }
00644 }
00645 template< class T >
00646 void MultiplyAtomic( const SparseSymmetricMatrix< T >& A , const Vector< double >& In , Vector< double >& Out , int threads , const int* partition=NULL )
00647 {
00648 Out.SetZero();
00649 const double* in = &In[0];
00650 double* out = &Out[0];
00651
00652 if( partition )
00653 #pragma omp parallel for num_threads( threads )
00654 for( int t=0 ; t<threads ; t++ )
00655 for( int i=partition[t] ; i<partition[t+1] ; i++ )
00656 {
00657 const MatrixEntry< T >* temp = A[i];
00658 const MatrixEntry< T >* end = temp + A.rowSizes[i];
00659 const double& in_i = in[i];
00660 double out_i = 0.;
00661 for( ; temp!=end ; temp++ )
00662 {
00663 int j = temp->N;
00664 T v = temp->Value;
00665 out_i += v * in[j];
00666 AtomicIncrement( out+j , v * in_i );
00667 }
00668 AtomicIncrement( out+i , out_i );
00669 }
00670 else
00671 #pragma omp parallel for num_threads( threads )
00672 for( int i=0 ; i<A.rows ; i++ )
00673 {
00674 const MatrixEntry< T >* temp = A[i];
00675 const MatrixEntry< T >* end = temp + A.rowSizes[i];
00676 const double& in_i = in[i];
00677 double out_i = 0.;
00678 for( ; temp!=end ; temp++ )
00679 {
00680 int j = temp->N;
00681 T v = temp->Value;
00682 out_i += v * in[j];
00683 AtomicIncrement( out+j , v * in_i );
00684 }
00685 AtomicIncrement( out+i , out_i );
00686 }
00687 }
00688
00689 template< class T >
00690 template< class T2 >
00691 int SparseSymmetricMatrix< T >::SolveAtomic( const SparseSymmetricMatrix< T >& A , const Vector< T2 >& b , int iters , Vector< T2 >& x , T2 eps , int reset , int threads , bool solveNormal )
00692 {
00693 eps *= eps;
00694 int dim = b.Dimensions();
00695 if( reset )
00696 {
00697 x.Resize( dim );
00698 x.SetZero();
00699 }
00700 Vector< T2 > r( dim ) , d( dim ) , q( dim );
00701 Vector< T2 > temp;
00702 if( solveNormal ) temp.Resize( dim );
00703 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
00704 const T2* _b = &b[0];
00705
00706 std::vector< int > partition( threads+1 );
00707 {
00708 int eCount = 0;
00709 for( int i=0 ; i<A.rows ; i++ ) eCount += A.rowSizes[i];
00710 partition[0] = 0;
00711 #pragma omp parallel for num_threads( threads )
00712 for( int t=0 ; t<threads ; t++ )
00713 {
00714 int _eCount = 0;
00715 for( int i=0 ; i<A.rows ; i++ )
00716 {
00717 _eCount += A.rowSizes[i];
00718 if( _eCount*threads>=eCount*(t+1) )
00719 {
00720 partition[t+1] = i;
00721 break;
00722 }
00723 }
00724 }
00725 partition[threads] = A.rows;
00726 }
00727 if( solveNormal )
00728 {
00729 MultiplyAtomic( A , x , temp , threads , &partition[0] );
00730 MultiplyAtomic( A , temp , r , threads , &partition[0] );
00731 MultiplyAtomic( A , b , temp , threads , &partition[0] );
00732 #pragma omp parallel for num_threads( threads ) schedule( static )
00733 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i];
00734 }
00735 else
00736 {
00737 MultiplyAtomic( A , x , r , threads , &partition[0] );
00738 #pragma omp parallel for num_threads( threads ) schedule( static )
00739 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i];
00740 }
00741 double delta_new = 0 , delta_0;
00742 for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
00743 delta_0 = delta_new;
00744 if( delta_new<eps )
00745 {
00746 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
00747 return 0;
00748 }
00749 int ii;
00750 for( ii=0; ii<iters && delta_new>eps*delta_0 ; ii++ )
00751 {
00752 if( solveNormal ) MultiplyAtomic( A , d , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , q , threads , &partition[0] );
00753 else MultiplyAtomic( A , d , q , threads , &partition[0] );
00754 double dDotQ = 0;
00755 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
00756 T2 alpha = T2( delta_new / dDotQ );
00757 #pragma omp parallel for num_threads( threads ) schedule( static )
00758 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
00759 if( (ii%50)==(50-1) )
00760 {
00761 r.Resize( dim );
00762 if( solveNormal ) MultiplyAtomic( A , x , temp , threads , &partition[0] ) , MultiplyAtomic( A , temp , r , threads , &partition[0] );
00763 else MultiplyAtomic( A , x , r , threads , &partition[0] );
00764 #pragma omp parallel for num_threads( threads ) schedule( static )
00765 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i];
00766 }
00767 else
00768 #pragma omp parallel for num_threads( threads ) schedule( static )
00769 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha;
00770
00771 double delta_old = delta_new;
00772 delta_new = 0;
00773 for( size_t i=0 ; i<dim ; i++ ) delta_new += _r[i] * _r[i];
00774 T2 beta = T2( delta_new / delta_old );
00775 #pragma omp parallel for num_threads( threads ) schedule( static )
00776 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
00777 }
00778 return ii;
00779 }
00780 #endif // _WIN32 && !__MINGW32__
00781 template< class T >
00782 template< class T2 >
00783 int SparseSymmetricMatrix< T >::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , MapReduceVector< T2 >& scratch , T2 eps , int reset , bool addDCTerm , bool solveNormal )
00784 {
00785 int threads = scratch.threads();
00786 eps *= eps;
00787 int dim = int( b.Dimensions() );
00788 Vector< T2 > r( dim ) , d( dim ) , q( dim ) , temp;
00789 if( reset ) x.Resize( dim );
00790 if( solveNormal ) temp.Resize( dim );
00791 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
00792 const T2* _b = &b[0];
00793
00794 double delta_new = 0 , delta_0;
00795 if( solveNormal )
00796 {
00797 A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm ) , A.Multiply( b , temp , scratch , addDCTerm );
00798 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00799 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
00800 }
00801 else
00802 {
00803 A.Multiply( x , r , scratch , addDCTerm );
00804 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
00805 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
00806 }
00807 delta_0 = delta_new;
00808 if( delta_new<eps )
00809 {
00810 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
00811 return 0;
00812 }
00813 int ii;
00814 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
00815 {
00816 if( solveNormal ) A.Multiply( d , temp , scratch , addDCTerm ) , A.Multiply( temp , q , scratch , addDCTerm );
00817 else A.Multiply( d , q , scratch , addDCTerm );
00818 double dDotQ = 0;
00819 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
00820 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
00821 T2 alpha = T2( delta_new / dDotQ );
00822 double delta_old = delta_new;
00823 delta_new = 0;
00824 if( (ii%50)==(50-1) )
00825 {
00826 #pragma omp parallel for num_threads( threads )
00827 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
00828 r.Resize( dim );
00829 if( solveNormal ) A.Multiply( x , temp , scratch , addDCTerm ) , A.Multiply( temp , r , scratch , addDCTerm );
00830 else A.Multiply( x , r , scratch , addDCTerm );
00831 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00832 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00833 }
00834 else
00835 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00836 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00837
00838 T2 beta = T2( delta_new / delta_old );
00839 #pragma omp parallel for num_threads( threads )
00840 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
00841 }
00842 return ii;
00843 }
00844 template< class T >
00845 template< class T2 >
00846 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& A , const Vector<T2>& b , int iters , Vector<T2>& x , T2 eps , int reset , int threads , bool addDCTerm , bool solveNormal )
00847 {
00848 eps *= eps;
00849 int dim = int( b.Dimensions() );
00850 MapReduceVector< T2 > outScratch;
00851 if( threads<1 ) threads = 1;
00852 if( threads>1 ) outScratch.resize( threads , dim );
00853 if( reset ) x.Resize( dim );
00854 Vector< T2 > r( dim ) , d( dim ) , q( dim );
00855 Vector< T2 > temp;
00856 if( solveNormal ) temp.Resize( dim );
00857 T2 *_x = &x[0] , *_r = &r[0] , *_d = &d[0] , *_q = &q[0];
00858 const T2* _b = &b[0];
00859
00860 double delta_new = 0 , delta_0;
00861
00862 if( solveNormal )
00863 {
00864 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm ) , A.Multiply( b , temp , outScratch , addDCTerm );
00865 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm ) , A.Multiply( b , temp , addDCTerm );
00866 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00867 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = temp[i] - _r[i] , delta_new += _r[i] * _r[i];
00868 }
00869 else
00870 {
00871 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
00872 else A.Multiply( x , r , addDCTerm );
00873 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00874 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i];
00875 }
00876
00877 delta_0 = delta_new;
00878 if( delta_new<eps )
00879 {
00880 fprintf( stderr , "[WARNING] Initial residual too low: %g < %f\n" , delta_new , eps );
00881 return 0;
00882 }
00883 int ii;
00884 for( ii=0 ; ii<iters && delta_new>eps*delta_0 ; ii++ )
00885 {
00886 if( solveNormal )
00887 {
00888 if( threads>1 ) A.Multiply( d , temp , outScratch , addDCTerm ) , A.Multiply( temp , q , outScratch , addDCTerm );
00889 else A.Multiply( d , temp , addDCTerm ) , A.Multiply( temp , q , addDCTerm );
00890 }
00891 else
00892 {
00893 if( threads>1 ) A.Multiply( d , q , outScratch , addDCTerm );
00894 else A.Multiply( d , q , addDCTerm );
00895 }
00896 double dDotQ = 0;
00897 #pragma omp parallel for num_threads( threads ) reduction( + : dDotQ )
00898 for( int i=0 ; i<dim ; i++ ) dDotQ += _d[i] * _q[i];
00899 T2 alpha = T2( delta_new / dDotQ );
00900 double delta_old = delta_new;
00901 delta_new = 0;
00902
00903 if( (ii%50)==(50-1) )
00904 {
00905 #pragma omp parallel for num_threads( threads )
00906 for( int i=0 ; i<dim ; i++ ) _x[i] += _d[i] * alpha;
00907 r.SetZero();
00908 if( solveNormal )
00909 {
00910 if( threads>1 ) A.Multiply( x , temp , outScratch , addDCTerm ) , A.Multiply( temp , r , outScratch , addDCTerm );
00911 else A.Multiply( x , temp , addDCTerm ) , A.Multiply( temp , r , addDCTerm );
00912 }
00913 else
00914 {
00915 if( threads>1 ) A.Multiply( x , r , outScratch , addDCTerm );
00916 else A.Multiply( x , r , addDCTerm );
00917 }
00918 #pragma omp parallel for num_threads( threads ) reduction ( + : delta_new )
00919 for( int i=0 ; i<dim ; i++ ) _r[i] = _b[i] - _r[i] , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00920 }
00921 else
00922 {
00923 #pragma omp parallel for num_threads( threads ) reduction( + : delta_new )
00924 for( int i=0 ; i<dim ; i++ ) _r[i] -= _q[i] * alpha , delta_new += _r[i] * _r[i] , _x[i] += _d[i] * alpha;
00925 }
00926
00927 T2 beta = T2( delta_new / delta_old );
00928 #pragma omp parallel for num_threads( threads )
00929 for( int i=0 ; i<dim ; i++ ) _d[i] = _r[i] + _d[i] * beta;
00930 }
00931 return ii;
00932 }
00933
00934 template<class T>
00935 template<class T2>
00936 int SparseSymmetricMatrix<T>::Solve( const SparseSymmetricMatrix<T>& M , const Vector<T2>& diagonal , const Vector<T2>& b , int iters , Vector<T2>& solution , int reset )
00937 {
00938 Vector<T2> d,r,Md;
00939
00940 if(reset)
00941 {
00942 solution.Resize(b.Dimensions());
00943 solution.SetZero();
00944 }
00945 Md.Resize(M.rows);
00946 for( int i=0 ; i<iters ; i++ )
00947 {
00948 M.Multiply( solution , Md );
00949 r = b-Md;
00950
00951
00952
00953 for( int j=0 ; j<int(M.rows) ; j++ ) solution[j] += (b[j]-Md[j])/diagonal[j];
00954 }
00955 return iters;
00956 }
00957 template< class T >
00958 template< class T2 >
00959 void SparseSymmetricMatrix< T >::getDiagonal( Vector< T2 >& diagonal ) const
00960 {
00961 diagonal.Resize( SparseMatrix<T>::rows );
00962 for( int i=0 ; i<SparseMatrix<T>::rows ; i++ )
00963 {
00964 diagonal[i] = 0.;
00965 for( int j=0 ; j<SparseMatrix<T>::rowSizes[i] ; j++ ) if( SparseMatrix<T>::m_ppElements[i][j].N==i ) diagonal[i] += SparseMatrix<T>::m_ppElements[i][j].Value * 2;
00966 }
00967 }
00968
00969 }
00970 }