$search
00001 /* 00002 * OpenNL: Numerical Library 00003 * Copyright (C) 2004 Bruno Levy 00004 * 00005 * This program is free software; you can redistribute it and/or modify 00006 * it under the terms of the GNU General Public License as published by 00007 * the Free Software Foundation; either version 2 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU General Public License 00016 * along with this program; if not, write to the Free Software 00017 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 00018 * 00019 * If you modify this software, you should include a notice giving the 00020 * name of the person performing the modification, the date of modification, 00021 * and the reason for such modification. 00022 * 00023 * Contact: Bruno Levy 00024 * 00025 * levy@loria.fr 00026 * 00027 * ISA Project 00028 * LORIA, INRIA Lorraine, 00029 * Campus Scientifique, BP 239 00030 * 54506 VANDOEUVRE LES NANCY CEDEX 00031 * FRANCE 00032 * 00033 * Note that the GNU General Public License does not permit incorporating 00034 * the Software into proprietary programs. 00035 * 00036 */ 00037 00038 #include <NL/nl_iterative_solvers.h> 00039 #include <NL/nl_blas.h> 00040 #include <NL/nl_matrix.h> 00041 #include <NL/nl_context.h> 00042 00043 /************************************************************************/ 00044 /* Solvers */ 00045 00046 /* 00047 * The implementation of the solvers is inspired by 00048 * the lsolver library, by Christian Badura, available from: 00049 * http://www.mathematik.uni-freiburg.de 00050 * /IAM/Research/projectskr/lin_solver/ 00051 * 00052 * About the Conjugate Gradient, details can be found in: 00053 * Ashby, Manteuffel, Saylor 00054 * A taxononmy for conjugate gradient methods 00055 * SIAM J Numer Anal 27, 1542-1568 (1990) 00056 */ 00057 00058 NLuint nlSolve_CG() { 00059 NLdouble* b = nlCurrentContext->b ; 00060 NLdouble* x = nlCurrentContext->x ; 00061 NLdouble eps = nlCurrentContext->threshold ; 00062 NLuint max_iter = nlCurrentContext->max_iterations ; 00063 NLint N = nlCurrentContext->n ; 00064 00065 NLdouble *g = NL_NEW_ARRAY(NLdouble, N) ; 00066 NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; 00067 NLdouble *p = NL_NEW_ARRAY(NLdouble, N) ; 00068 NLuint its=0; 00069 NLdouble t, tau, sig, rho, gam; 00070 NLdouble err=eps*eps*ddot(N,b,1,b,1); 00071 00072 nlCurrentContext->matrix_vector_prod(x,g); 00073 daxpy(N,-1.,b,1,g,1); 00074 dscal(N,-1.,g,1); 00075 dcopy(N,g,1,r,1); 00076 while ( ddot(N,g,1,g,1)>err && its < max_iter) { 00077 nlCurrentContext->matrix_vector_prod(r,p); 00078 rho=ddot(N,p,1,p,1); 00079 sig=ddot(N,r,1,p,1); 00080 tau=ddot(N,g,1,r,1); 00081 t=tau/sig; 00082 daxpy(N,t,r,1,x,1); 00083 daxpy(N,-t,p,1,g,1); 00084 gam=(t*t*rho-tau)/tau; 00085 dscal(N,gam,r,1); 00086 daxpy(N,1.,g,1,r,1); 00087 ++its; 00088 } 00089 NL_DELETE_ARRAY(g) ; 00090 NL_DELETE_ARRAY(r) ; 00091 NL_DELETE_ARRAY(p) ; 00092 return its; 00093 } 00094 00095 00096 NLuint nlSolve_CG_precond() { 00097 NLdouble* b = nlCurrentContext->b ; 00098 NLdouble* x = nlCurrentContext->x ; 00099 NLdouble eps = nlCurrentContext->threshold ; 00100 NLuint max_iter = nlCurrentContext->max_iterations ; 00101 NLint N = nlCurrentContext->n ; 00102 00103 NLdouble* r = NL_NEW_ARRAY(NLdouble, N) ; 00104 NLdouble* d = NL_NEW_ARRAY(NLdouble, N) ; 00105 NLdouble* h = NL_NEW_ARRAY(NLdouble, N) ; 00106 NLdouble *Ad = h; 00107 NLuint its=0; 00108 NLdouble rh, alpha, beta; 00109 NLdouble err=eps*eps*ddot(N,b,1,b,1); 00110 00111 nlCurrentContext->matrix_vector_prod(x,r); 00112 daxpy(N,-1.,b,1,r,1); 00113 nlCurrentContext->precond_vector_prod(r,d); 00114 dcopy(N,d,1,h,1); 00115 rh=ddot(N,r,1,h,1); 00116 while ( ddot(N,r,1,r,1)>err && its < max_iter) { 00117 nlCurrentContext->matrix_vector_prod(d,Ad); 00118 alpha=rh/ddot(N,d,1,Ad,1); 00119 daxpy(N,-alpha,d,1,x,1); 00120 daxpy(N,-alpha,Ad,1,r,1); 00121 nlCurrentContext->precond_vector_prod(r,h); 00122 beta=1./rh; rh=ddot(N,r,1,h,1); beta*=rh; 00123 dscal(N,beta,d,1); 00124 daxpy(N,1.,h,1,d,1); 00125 ++its; 00126 } 00127 NL_DELETE_ARRAY(r) ; 00128 NL_DELETE_ARRAY(d) ; 00129 NL_DELETE_ARRAY(h) ; 00130 00131 return its; 00132 } 00133 00134 NLuint nlSolve_BICGSTAB() { 00135 NLdouble* b = nlCurrentContext->b ; 00136 NLdouble* x = nlCurrentContext->x ; 00137 NLdouble eps = nlCurrentContext->threshold ; 00138 NLuint max_iter = nlCurrentContext->max_iterations ; 00139 NLint N = nlCurrentContext->n ; 00140 00141 NLdouble *rT = NL_NEW_ARRAY(NLdouble, N) ; 00142 NLdouble *d = NL_NEW_ARRAY(NLdouble, N) ; 00143 NLdouble *h = NL_NEW_ARRAY(NLdouble, N) ; 00144 NLdouble *u = NL_NEW_ARRAY(NLdouble, N) ; 00145 NLdouble *Ad = NL_NEW_ARRAY(NLdouble, N) ; 00146 NLdouble *t = NL_NEW_ARRAY(NLdouble, N) ; 00147 NLdouble *s = h; 00148 NLdouble rTh, rTAd, rTr, alpha, beta, omega, st, tt ; 00149 NLuint its=0; 00150 NLdouble err=eps*eps*ddot(N,b,1,b,1); 00151 NLdouble *r = NL_NEW_ARRAY(NLdouble, N) ; 00152 00153 nlCurrentContext->matrix_vector_prod(x,r); 00154 daxpy(N,-1.,b,1,r,1); 00155 dcopy(N,r,1,d,1); 00156 dcopy(N,d,1,h,1); 00157 dcopy(N,h,1,rT,1); 00158 nl_assert( ddot(N,rT,1,rT,1)>1e-40 ); 00159 rTh=ddot(N,rT,1,h,1); 00160 rTr=ddot(N,r,1,r,1); 00161 while ( rTr>err && its < max_iter) { 00162 nlCurrentContext->matrix_vector_prod(d,Ad); 00163 rTAd=ddot(N,rT,1,Ad,1); 00164 nl_assert( fabs(rTAd)>1e-40 ); 00165 alpha=rTh/rTAd; 00166 daxpy(N,-alpha,Ad,1,r,1); 00167 dcopy(N,h,1,s,1); 00168 daxpy(N,-alpha,Ad,1,s,1); 00169 nlCurrentContext->matrix_vector_prod(s,t); 00170 daxpy(N,1.,t,1,u,1); 00171 dscal(N,alpha,u,1); 00172 st=ddot(N,s,1,t,1); 00173 tt=ddot(N,t,1,t,1); 00174 if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) { 00175 omega = 0.; 00176 } else { 00177 omega = st/tt; 00178 } 00179 daxpy(N,-omega,t,1,r,1); 00180 daxpy(N,-alpha,d,1,x,1); 00181 daxpy(N,-omega,s,1,x,1); 00182 dcopy(N,s,1,h,1); 00183 daxpy(N,-omega,t,1,h,1); 00184 beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh; 00185 dscal(N,beta,d,1); 00186 daxpy(N,1.,h,1,d,1); 00187 daxpy(N,-beta*omega,Ad,1,d,1); 00188 rTr=ddot(N,r,1,r,1); 00189 ++its; 00190 } 00191 00192 NL_DELETE_ARRAY(r) ; 00193 NL_DELETE_ARRAY(rT) ; 00194 NL_DELETE_ARRAY(d) ; 00195 NL_DELETE_ARRAY(h) ; 00196 NL_DELETE_ARRAY(u) ; 00197 NL_DELETE_ARRAY(Ad) ; 00198 NL_DELETE_ARRAY(t) ; 00199 00200 return its; 00201 } 00202 00203 00204 NLuint nlSolve_BICGSTAB_precond() { 00205 00206 NLdouble* b = nlCurrentContext->b ; 00207 NLdouble* x = nlCurrentContext->x ; 00208 NLdouble eps = nlCurrentContext->threshold ; 00209 NLuint max_iter = nlCurrentContext->max_iterations ; 00210 NLint N = nlCurrentContext->n ; 00211 00212 NLdouble *rT = NL_NEW_ARRAY(NLdouble, N) ; 00213 NLdouble *d = NL_NEW_ARRAY(NLdouble, N) ; 00214 NLdouble *h = NL_NEW_ARRAY(NLdouble, N) ; 00215 NLdouble *u = NL_NEW_ARRAY(NLdouble, N) ; 00216 NLdouble *Sd = NL_NEW_ARRAY(NLdouble, N) ; 00217 NLdouble *t = NL_NEW_ARRAY(NLdouble, N) ; 00218 NLdouble *aux = NL_NEW_ARRAY(NLdouble, N) ; 00219 NLdouble *s = h; 00220 NLdouble rTh, rTSd, rTr, alpha, beta, omega, st, tt; 00221 NLuint its=0; 00222 NLdouble err=eps*eps*ddot(N,b,1,b,1); 00223 NLdouble *r = NL_NEW_ARRAY(NLdouble, N); 00224 00225 nlCurrentContext->matrix_vector_prod(x,r); 00226 daxpy(N,-1.,b,1,r,1); 00227 nlCurrentContext->precond_vector_prod(r,d); 00228 dcopy(N,d,1,h,1); 00229 dcopy(N,h,1,rT,1); 00230 nl_assert( ddot(N,rT,1,rT,1)>1e-40 ); 00231 rTh=ddot(N,rT,1,h,1); 00232 rTr=ddot(N,r,1,r,1); 00233 while ( rTr>err && its < max_iter) { 00234 nlCurrentContext->matrix_vector_prod(d,aux); 00235 nlCurrentContext->precond_vector_prod(aux,Sd); 00236 rTSd=ddot(N,rT,1,Sd,1); 00237 nl_assert( fabs(rTSd)>1e-40 ); 00238 alpha=rTh/rTSd; 00239 daxpy(N,-alpha,aux,1,r,1); 00240 dcopy(N,h,1,s,1); 00241 daxpy(N,-alpha,Sd,1,s,1); 00242 nlCurrentContext->matrix_vector_prod(s,aux); 00243 nlCurrentContext->precond_vector_prod(aux,t); 00244 daxpy(N,1.,t,1,u,1); 00245 dscal(N,alpha,u,1); 00246 st=ddot(N,s,1,t,1); 00247 tt=ddot(N,t,1,t,1); 00248 if ( fabs(st)<1e-40 || fabs(tt)<1e-40 ) { 00249 omega = 0.; 00250 } else { 00251 omega = st/tt; 00252 } 00253 daxpy(N,-omega,aux,1,r,1); 00254 daxpy(N,-alpha,d,1,x,1); 00255 daxpy(N,-omega,s,1,x,1); 00256 dcopy(N,s,1,h,1); 00257 daxpy(N,-omega,t,1,h,1); 00258 beta=(alpha/omega)/rTh; rTh=ddot(N,rT,1,h,1); beta*=rTh; 00259 dscal(N,beta,d,1); 00260 daxpy(N,1.,h,1,d,1); 00261 daxpy(N,-beta*omega,Sd,1,d,1); 00262 rTr=ddot(N,r,1,r,1); 00263 ++its; 00264 } 00265 00266 NL_DELETE_ARRAY(r); 00267 NL_DELETE_ARRAY(rT); 00268 NL_DELETE_ARRAY(d); 00269 NL_DELETE_ARRAY(h); 00270 NL_DELETE_ARRAY(u); 00271 NL_DELETE_ARRAY(Sd); 00272 NL_DELETE_ARRAY(t); 00273 NL_DELETE_ARRAY(aux); 00274 00275 return its; 00276 } 00277 00278 NLuint nlSolve_GMRES() { 00279 00280 NLdouble* b = nlCurrentContext->b ; 00281 NLdouble* x = nlCurrentContext->x ; 00282 NLdouble eps = nlCurrentContext->threshold ; 00283 NLint max_iter = nlCurrentContext->max_iterations ; 00284 NLint n = nlCurrentContext->n ; 00285 NLint m = nlCurrentContext->inner_iterations ; 00286 00287 typedef NLdouble *NLdoubleP; 00288 NLdouble *V = NL_NEW_ARRAY(NLdouble, n*(m+1) ) ; 00289 NLdouble *U = NL_NEW_ARRAY(NLdouble, m*(m+1)/2 ) ; 00290 NLdouble *r = NL_NEW_ARRAY(NLdouble, n ) ; 00291 NLdouble *y = NL_NEW_ARRAY(NLdouble, m+1 ) ; 00292 NLdouble *c = NL_NEW_ARRAY(NLdouble, m ) ; 00293 NLdouble *s = NL_NEW_ARRAY(NLdouble, m ) ; 00294 NLdouble **v = NL_NEW_ARRAY(NLdoubleP, m+1 ) ; 00295 NLint i, j, io, uij, u0j ; 00296 NLint its = -1 ; 00297 NLdouble beta, h, rd, dd, nrm2b ; 00298 00299 for ( i=0; i<=m; ++i ) v[i]=V+i*n ; 00300 00301 nrm2b=dnrm2(n,b,1); 00302 io=0; 00303 do { /* outer loop */ 00304 ++io; 00305 nlCurrentContext->matrix_vector_prod(x,r); 00306 daxpy(n,-1.,b,1,r,1); 00307 beta=dnrm2(n,r,1); 00308 dcopy(n,r,1,v[0],1); 00309 dscal(n,1./beta,v[0],1); 00310 00311 y[0]=beta; 00312 j=0; 00313 uij=0; 00314 do { /* inner loop: j=0,...,m-1 */ 00315 u0j=uij; 00316 nlCurrentContext->matrix_vector_prod(v[j],v[j+1]); 00317 dgemv( 00318 Transpose,n,j+1,1.,V,n,v[j+1],1,0.,U+u0j,1 00319 ); 00320 dgemv( 00321 NoTranspose,n,j+1,-1.,V,n,U+u0j,1,1.,v[j+1],1 00322 ); 00323 h=dnrm2(n,v[j+1],1); 00324 dscal(n,1./h,v[j+1],1); 00325 for (i=0; i<j; ++i ) { /* rotiere neue Spalte */ 00326 double tmp = c[i]*U[uij]-s[i]*U[uij+1]; 00327 U[uij+1] = s[i]*U[uij]+c[i]*U[uij+1]; 00328 U[uij] = tmp; 00329 ++uij; 00330 } 00331 { /* berechne neue Rotation */ 00332 rd = U[uij]; 00333 dd = sqrt(rd*rd+h*h); 00334 c[j] = rd/dd; 00335 s[j] = -h/dd; 00336 U[uij] = dd; 00337 ++uij; 00338 } 00339 { /* rotiere rechte Seite y (vorher: y[j+1]=0) */ 00340 y[j+1] = s[j]*y[j]; 00341 y[j] = c[j]*y[j]; 00342 } 00343 ++j; 00344 } while ( 00345 j<m && fabs(y[j])>=eps*nrm2b 00346 ) ; 00347 { /* minimiere bzgl Y */ 00348 dtpsv( 00349 UpperTriangle, 00350 NoTranspose, 00351 NotUnitTriangular, 00352 j,U,y,1 00353 ); 00354 /* correct X */ 00355 dgemv(NoTranspose,n,j,-1.,V,n,y,1,1.,x,1); 00356 } 00357 } while ( fabs(y[j])>=eps*nrm2b && (m*(io-1)+j) < max_iter); 00358 00359 /* Count the inner iterations */ 00360 its = m*(io-1)+j; 00361 00362 NL_DELETE_ARRAY(V) ; 00363 NL_DELETE_ARRAY(U) ; 00364 NL_DELETE_ARRAY(r) ; 00365 NL_DELETE_ARRAY(y) ; 00366 NL_DELETE_ARRAY(c) ; 00367 NL_DELETE_ARRAY(s) ; 00368 NL_DELETE_ARRAY(v) ; 00369 00370 return its; 00371 } 00372