opennl: opennl: nl_iterative_solvers.c Source File

00001 /*
00002  *  OpenNL: Numerical Library
00003  *  Copyright (C) 2004 Bruno Levy
00004  *
00005  *  This program is free software; you can redistribute it and/or modify
00006  *  it under the terms of the GNU General Public License as published by
00007  *  the Free Software Foundation; either version 2 of the License, or
00008  *  (at your option) any later version.
00009  *
00010  *  This program is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  *  GNU General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with this program; if not, write to the Free Software
00017  *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
00018  *
00019  *  If you modify this software, you should include a notice giving the
00020  *  name of the person performing the modification, the date of modification,
00021  *  and the reason for such modification.
00022  *
00023  *  Contact: Bruno Levy
00024  *
00025  *     levy@loria.fr
00026  *
00027  *     ISA Project
00028  *     LORIA, INRIA Lorraine, 
00029  *     Campus Scientifique, BP 239
00030  *     54506 VANDOEUVRE LES NANCY CEDEX 
00031  *     FRANCE
00032  *
00033  *  Note that the GNU General Public License does not permit incorporating
00034  *  the Software into proprietary programs. 
00035  *
00036  */
00037 
00038 #include <NL/nl_iterative_solvers.h>
00039 #include <NL/nl_blas.h>
00040 #include <NL/nl_matrix.h>
00041 #include <NL/nl_context.h>
00042 
00043 /************************************************************************/
00044 /* Solvers */
00045 
00046 /*
00047  * The implementation of the solvers is inspired by 
00048  * the lsolver library, by Christian Badura, available from:
00049  * http://www.mathematik.uni-freiburg.de
00050  * /IAM/Research/projectskr/lin_solver/
00051  *
00052  * About the Conjugate Gradient, details can be found in:
00053  *  Ashby, Manteuffel, Saylor
00054  *     A taxononmy for conjugate gradient methods
00055  *     SIAM J Numer Anal 27, 1542-1568 (1990)
00056  */
00057 
00058 NLuint nlSolve_CG() {
00059     NLdouble* b        = nlCurrentContext->b ;
00060     NLdouble* x        = nlCurrentContext->x ;
00061     NLdouble  eps      = nlCurrentContext->threshold ;
00062     NLuint    max_iter = nlCurrentContext->max_iterations ;
00063     NLint     N        = nlCurrentContext->n ;
00064 
00065     NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ;
00066     NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; 
00067     NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ;
00068     NLuint its=0;
00069     NLdouble t, tau, sig, rho, gam;
00070     NLdouble err=eps*eps*ddot(N,b,1,b,1);
00071 
00072     nlCurrentContext->matrix_vector_prod(x,g);
00073     daxpy(N,-1.,b,1,g,1);
00074     dscal(N,-1.,g,1);
00075     dcopy(N,g,1,r,1);
00076     while ( ddot(N,g,1,g,1)>err && its < max_iter) {
00077         nlCurrentContext->matrix_vector_prod(r,p);
00078         rho=ddot(N,p,1,p,1);
00079         sig=ddot(N,r,1,p,1);
00080         tau=ddot(N,g,1,r,1);
00081         t=tau/sig;
00082         daxpy(N,t,r,1,x,1);
00083         daxpy(N,-t,p,1,g,1);
00084         gam=(t*t*rho-tau)/tau;
00085         dscal(N,gam,r,1);
00086         daxpy(N,1.,g,1,r,1);
00087         ++its;
00088     }
00089     NL_DELETE_ARRAY(g) ;
00090     NL_DELETE_ARRAY(r) ;
00091     NL_DELETE_ARRAY(p) ;
00092     return its;
00093 } 
00094 
00095 
00096 NLuint nlSolve_CG_precond()  {
00097     NLdouble* b        = nlCurrentContext->b ;
00098     NLdouble* x        = nlCurrentContext->x ;
00099     NLdouble  eps      = nlCurrentContext->threshold ;
00100     NLuint    max_iter = nlCurrentContext->max_iterations ;
00101     NLint     N        = nlCurrentContext->n ;
00102 
00103     NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ;
00104     NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ;
00105     NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ;
00106     NLdouble *Ad = h;
00107     NLuint its=0;
00108     NLdouble rh, alpha, beta;
00109     NLdouble err=eps*eps*ddot(N,b,1,b,1);
00110     
00111     nlCurrentContext->matrix_vector_prod(x,r);
00112     daxpy(N,-1.,b,1,r,1);
00113     nlCurrentContext->precond_vector_prod(r,d);
00114     dcopy(N,d,1,h,1);
00115     rh=ddot(N,r,1,h,1);
00116     while ( ddot(N,r,1,r,1)>err && its < max_iter) {
00117         nlCurrentContext->matrix_vector_prod(d,Ad);
00118         alpha=rh/ddot(N,d,1,Ad,1);
00119         daxpy(N,-alpha,d,1,x,1);
00120         daxpy(N,-alpha,Ad,1,r,1);
00121         nlCurrentContext->precond_vector_prod(r,h);
00122         beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh;
00123         dscal(N,beta,d,1);
00124         daxpy(N,1.,h,1,d,1);
00125         ++its;
00126     }
00127     NL_DELETE_ARRAY(r) ;
00128     NL_DELETE_ARRAY(d) ;
00129     NL_DELETE_ARRAY(h) ;
00130     
00131     return its;
00132 }
00133 
00134 NLuint nlSolve_BICGSTAB() {
00135     NLdouble* b        = nlCurrentContext->b ;
00136     NLdouble* x        = nlCurrentContext->x ;
00137     NLdouble  eps      = nlCurrentContext->threshold ;
00138     NLuint    max_iter = nlCurrentContext->max_iterations ;
00139     NLint     N        = nlCurrentContext->n ;
00140 
00141     NLdouble *rT  = NL_NEW_ARRAY(NLdouble, N) ; 
00142     NLdouble *d   = NL_NEW_ARRAY(NLdouble, N) ; 
00143     NLdouble *h   = NL_NEW_ARRAY(NLdouble, N) ; 
00144     NLdouble *u   = NL_NEW_ARRAY(NLdouble, N) ; 
00145     NLdouble *Ad  = NL_NEW_ARRAY(NLdouble, N) ; 
00146     NLdouble *t   = NL_NEW_ARRAY(NLdouble, N) ; 
00147     NLdouble *s   = h;
00148     NLdouble rTh, rTAd, rTr, alpha, beta, omega, st, tt ;
00149     NLuint its=0;
00150     NLdouble err=eps*eps*ddot(N,b,1,b,1);
00151     NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ;
00152 
00153     nlCurrentContext->matrix_vector_prod(x,r);
00154     daxpy(N,-1.,b,1,r,1);
00155     dcopy(N,r,1,d,1);
00156     dcopy(N,d,1,h,1);
00157     dcopy(N,h,1,rT,1);
00158     nl_assert( ddot(N,rT,1,rT,1)>1e-40 );
00159     rTh=ddot(N,rT,1,h,1);
00160     rTr=ddot(N,r,1,r,1);
00161     while ( rTr>err && its < max_iter) {
00162         nlCurrentContext->matrix_vector_prod(d,Ad);
00163         rTAd=ddot(N,rT,1,Ad,1);
00164         nl_assert( fabs(rTAd)>1e-40 );
00165         alpha=rTh/rTAd;
00166         daxpy(N,-alpha,Ad,1,r,1);
00167         dcopy(N,h,1,s,1);
00168         daxpy(N,-alpha,Ad,1,s,1);
00169         nlCurrentContext->matrix_vector_prod(s,t);
00170         daxpy(N,1.,t,1,u,1);
00171         dscal(N,alpha,u,1);
00172         st=ddot(N,s,1,t,1);
00173         tt=ddot(N,t,1,t,1);
00174         if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) {
00175             omega = 0.;
00176         } else {
00177             omega = st/tt;
00178         }
00179         daxpy(N,-omega,t,1,r,1);
00180         daxpy(N,-alpha,d,1,x,1);
00181         daxpy(N,-omega,s,1,x,1);
00182         dcopy(N,s,1,h,1);
00183         daxpy(N,-omega,t,1,h,1);
00184         beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh;
00185         dscal(N,beta,d,1);
00186         daxpy(N,1.,h,1,d,1);
00187         daxpy(N,-beta*omega,Ad,1,d,1);
00188         rTr=ddot(N,r,1,r,1);
00189         ++its;
00190     }
00191 
00192     NL_DELETE_ARRAY(r) ;
00193     NL_DELETE_ARRAY(rT) ;
00194     NL_DELETE_ARRAY(d) ;
00195     NL_DELETE_ARRAY(h) ;
00196     NL_DELETE_ARRAY(u) ;
00197     NL_DELETE_ARRAY(Ad) ;
00198     NL_DELETE_ARRAY(t) ;
00199     
00200     return its;
00201 }
00202 
00203 
00204 NLuint nlSolve_BICGSTAB_precond() {
00205 
00206     NLdouble* b        = nlCurrentContext->b ;
00207     NLdouble* x        = nlCurrentContext->x ;
00208     NLdouble  eps      = nlCurrentContext->threshold ;
00209     NLuint    max_iter = nlCurrentContext->max_iterations ;
00210     NLint     N        = nlCurrentContext->n ;
00211 
00212     NLdouble *rT  = NL_NEW_ARRAY(NLdouble, N) ;
00213     NLdouble *d   = NL_NEW_ARRAY(NLdouble, N) ;
00214     NLdouble *h   = NL_NEW_ARRAY(NLdouble, N) ;
00215     NLdouble *u   = NL_NEW_ARRAY(NLdouble, N) ;
00216     NLdouble *Sd  = NL_NEW_ARRAY(NLdouble, N) ;
00217     NLdouble *t   = NL_NEW_ARRAY(NLdouble, N) ;
00218     NLdouble *aux = NL_NEW_ARRAY(NLdouble, N) ;
00219     NLdouble *s   = h;
00220     NLdouble rTh, rTSd, rTr, alpha, beta, omega, st, tt;
00221     NLuint its=0;
00222     NLdouble err=eps*eps*ddot(N,b,1,b,1);
00223     NLdouble *r = NL_NEW_ARRAY(NLdouble, N);
00224 
00225     nlCurrentContext->matrix_vector_prod(x,r);
00226     daxpy(N,-1.,b,1,r,1);
00227     nlCurrentContext->precond_vector_prod(r,d);
00228     dcopy(N,d,1,h,1);
00229     dcopy(N,h,1,rT,1);
00230     nl_assert( ddot(N,rT,1,rT,1)>1e-40 );
00231     rTh=ddot(N,rT,1,h,1);
00232     rTr=ddot(N,r,1,r,1);
00233     while ( rTr>err && its < max_iter) {
00234         nlCurrentContext->matrix_vector_prod(d,aux);
00235         nlCurrentContext->precond_vector_prod(aux,Sd);
00236         rTSd=ddot(N,rT,1,Sd,1);
00237         nl_assert( fabs(rTSd)>1e-40 );
00238         alpha=rTh/rTSd;
00239         daxpy(N,-alpha,aux,1,r,1);
00240         dcopy(N,h,1,s,1);
00241         daxpy(N,-alpha,Sd,1,s,1);
00242         nlCurrentContext->matrix_vector_prod(s,aux);
00243         nlCurrentContext->precond_vector_prod(aux,t);
00244         daxpy(N,1.,t,1,u,1);
00245         dscal(N,alpha,u,1);
00246         st=ddot(N,s,1,t,1);
00247         tt=ddot(N,t,1,t,1);
00248         if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) {
00249             omega = 0.;
00250         } else {
00251             omega = st/tt;
00252         }
00253         daxpy(N,-omega,aux,1,r,1);
00254         daxpy(N,-alpha,d,1,x,1);
00255         daxpy(N,-omega,s,1,x,1);
00256         dcopy(N,s,1,h,1);
00257         daxpy(N,-omega,t,1,h,1);
00258         beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh;
00259         dscal(N,beta,d,1);
00260         daxpy(N,1.,h,1,d,1);
00261         daxpy(N,-beta*omega,Sd,1,d,1);
00262         rTr=ddot(N,r,1,r,1);
00263         ++its;
00264     }
00265 
00266     NL_DELETE_ARRAY(r);
00267     NL_DELETE_ARRAY(rT);
00268     NL_DELETE_ARRAY(d);
00269     NL_DELETE_ARRAY(h);
00270     NL_DELETE_ARRAY(u);
00271     NL_DELETE_ARRAY(Sd);
00272     NL_DELETE_ARRAY(t);
00273     NL_DELETE_ARRAY(aux);
00274 
00275     return its;
00276 }
00277 
00278 NLuint nlSolve_GMRES() {
00279 
00280     NLdouble* b        = nlCurrentContext->b ;
00281     NLdouble* x        = nlCurrentContext->x ;
00282     NLdouble  eps      = nlCurrentContext->threshold ;
00283     NLint    max_iter  = nlCurrentContext->max_iterations ;
00284     NLint    n         = nlCurrentContext->n ;
00285     NLint    m         = nlCurrentContext->inner_iterations ;
00286 
00287     typedef NLdouble *NLdoubleP;
00288     NLdouble *V  = NL_NEW_ARRAY(NLdouble, n*(m+1)   ) ;
00289     NLdouble *U  = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ;
00290     NLdouble *r  = NL_NEW_ARRAY(NLdouble, n         ) ;
00291     NLdouble *y  = NL_NEW_ARRAY(NLdouble, m+1       ) ;
00292     NLdouble *c  = NL_NEW_ARRAY(NLdouble, m         ) ;
00293     NLdouble *s  = NL_NEW_ARRAY(NLdouble, m         ) ;
00294     NLdouble **v = NL_NEW_ARRAY(NLdoubleP, m+1      ) ;
00295     NLint i, j, io, uij, u0j ; 
00296     NLint its = -1 ;
00297     NLdouble beta, h, rd, dd, nrm2b ;
00298 
00299     for ( i=0; i<=m; ++i ) v[i]=V+i*n ;
00300 
00301     nrm2b=dnrm2(n,b,1);
00302     io=0;
00303     do  { /* outer loop */
00304         ++io;
00305         nlCurrentContext->matrix_vector_prod(x,r);
00306         daxpy(n,-1.,b,1,r,1);
00307         beta=dnrm2(n,r,1);
00308         dcopy(n,r,1,v[0],1);
00309         dscal(n,1./beta,v[0],1);
00310         
00311         y[0]=beta;
00312         j=0;
00313         uij=0;
00314         do { /* inner loop: j=0,...,m-1 */
00315             u0j=uij;
00316             nlCurrentContext->matrix_vector_prod(v[j],v[j+1]);
00317             dgemv(
00318                 Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1
00319             );
00320             dgemv(
00321                 NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1
00322             );
00323             h=dnrm2(n,v[j+1],1);
00324             dscal(n,1./h,v[j+1],1);
00325             for (i=0; i<j; ++i ) { /* rotiere neue Spalte */
00326                 double tmp = c[i]*U[uij]-s[i]*U[uij+1];
00327                 U[uij+1]   = s[i]*U[uij]+c[i]*U[uij+1];
00328                 U[uij]     = tmp;
00329                 ++uij;
00330             }
00331             { /* berechne neue Rotation */
00332                 rd     = U[uij];
00333                 dd     = sqrt(rd*rd+h*h);
00334                 c[j]   = rd/dd;
00335                 s[j]   = -h/dd;
00336                 U[uij] = dd;
00337                 ++uij;
00338             }
00339             { /* rotiere rechte Seite y (vorher: y[j+1]=0) */
00340                 y[j+1] = s[j]*y[j];
00341                 y[j]   = c[j]*y[j];
00342             }
00343             ++j;
00344         } while ( 
00345             j<m && fabs(y[j])>=eps*nrm2b 
00346         ) ;
00347         { /* minimiere bzgl Y */
00348             dtpsv(
00349                 UpperTriangle,
00350                 NoTranspose,
00351                 NotUnitTriangular,
00352                 j,U,y,1
00353             );
00354             /* correct X */
00355             dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1);
00356         }
00357     } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter);
00358     
00359     /* Count the inner iterations */
00360     its = m*(io-1)+j;
00361 
00362     NL_DELETE_ARRAY(V) ;
00363     NL_DELETE_ARRAY(U) ;
00364     NL_DELETE_ARRAY(r) ;
00365     NL_DELETE_ARRAY(y) ;
00366     NL_DELETE_ARRAY(c) ;
00367     NL_DELETE_ARRAY(s) ;
00368     NL_DELETE_ARRAY(v) ;
00369     
00370     return its;
00371 }
00372