swiftnav: slaed6.c Source File

Go to the documentation of this file.
00001 /* slaed6.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Subroutine */ int slaed6_(integer *kniter, logical *orgati, real *rho, 
00017         real *d__, real *z__, real *finit, real *tau, integer *info)
00018 {
00019     /* System generated locals */
00020     integer i__1;
00021     real r__1, r__2, r__3, r__4;
00022 
00023     /* Builtin functions */
00024     double sqrt(doublereal), log(doublereal), pow_ri(real *, integer *);
00025 
00026     /* Local variables */
00027     real a, b, c__, f;
00028     integer i__;
00029     real fc, df, ddf, lbd, eta, ubd, eps, base;
00030     integer iter;
00031     real temp, temp1, temp2, temp3, temp4;
00032     logical scale;
00033     integer niter;
00034     real small1, small2, sminv1, sminv2, dscale[3], sclfac;
00035     extern doublereal slamch_(char *);
00036     real zscale[3], erretm, sclinv;
00037 
00038 
00039 /*  -- LAPACK routine (version 3.2) -- */
00040 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00041 /*     February 2007 */
00042 
00043 /*     .. Scalar Arguments .. */
00044 /*     .. */
00045 /*     .. Array Arguments .. */
00046 /*     .. */
00047 
00048 /*  Purpose */
00049 /*  ======= */
00050 
00051 /*  SLAED6 computes the positive or negative root (closest to the origin) */
00052 /*  of */
00053 /*                   z(1)        z(2)        z(3) */
00054 /*  f(x) =   rho + --------- + ---------- + --------- */
00055 /*                  d(1)-x      d(2)-x      d(3)-x */
00056 
00057 /*  It is assumed that */
00058 
00059 /*        if ORGATI = .true. the root is between d(2) and d(3); */
00060 /*        otherwise it is between d(1) and d(2) */
00061 
00062 /*  This routine will be called by SLAED4 when necessary. In most cases, */
00063 /*  the root sought is the smallest in magnitude, though it might not be */
00064 /*  in some extremely rare situations. */
00065 
00066 /*  Arguments */
00067 /*  ========= */
00068 
00069 /*  KNITER       (input) INTEGER */
00070 /*               Refer to SLAED4 for its significance. */
00071 
00072 /*  ORGATI       (input) LOGICAL */
00073 /*               If ORGATI is true, the needed root is between d(2) and */
00074 /*               d(3); otherwise it is between d(1) and d(2).  See */
00075 /*               SLAED4 for further details. */
00076 
00077 /*  RHO          (input) REAL */
00078 /*               Refer to the equation f(x) above. */
00079 
00080 /*  D            (input) REAL array, dimension (3) */
00081 /*               D satisfies d(1) < d(2) < d(3). */
00082 
00083 /*  Z            (input) REAL array, dimension (3) */
00084 /*               Each of the elements in z must be positive. */
00085 
00086 /*  FINIT        (input) REAL */
00087 /*               The value of f at 0. It is more accurate than the one */
00088 /*               evaluated inside this routine (if someone wants to do */
00089 /*               so). */
00090 
00091 /*  TAU          (output) REAL */
00092 /*               The root of the equation f(x). */
00093 
00094 /*  INFO         (output) INTEGER */
00095 /*               = 0: successful exit */
00096 /*               > 0: if INFO = 1, failure to converge */
00097 
00098 /*  Further Details */
00099 /*  =============== */
00100 
00101 /*  30/06/99: Based on contributions by */
00102 /*     Ren-Cang Li, Computer Science Division, University of California */
00103 /*     at Berkeley, USA */
00104 
00105 /*  10/02/03: This version has a few statements commented out for thread safety */
00106 /*     (machine parameters are computed on each entry). SJH. */
00107 
00108 /*  05/10/06: Modified from a new version of Ren-Cang Li, use */
00109 /*     Gragg-Thornton-Warner cubic convergent scheme for better stability. */
00110 
00111 /*  ===================================================================== */
00112 
00113 /*     .. Parameters .. */
00114 /*     .. */
00115 /*     .. External Functions .. */
00116 /*     .. */
00117 /*     .. Local Arrays .. */
00118 /*     .. */
00119 /*     .. Local Scalars .. */
00120 /*     .. */
00121 /*     .. Intrinsic Functions .. */
00122 /*     .. */
00123 /*     .. Executable Statements .. */
00124 
00125     /* Parameter adjustments */
00126     --z__;
00127     --d__;
00128 
00129     /* Function Body */
00130     *info = 0;
00131 
00132     if (*orgati) {
00133         lbd = d__[2];
00134         ubd = d__[3];
00135     } else {
00136         lbd = d__[1];
00137         ubd = d__[2];
00138     }
00139     if (*finit < 0.f) {
00140         lbd = 0.f;
00141     } else {
00142         ubd = 0.f;
00143     }
00144 
00145     niter = 1;
00146     *tau = 0.f;
00147     if (*kniter == 2) {
00148         if (*orgati) {
00149             temp = (d__[3] - d__[2]) / 2.f;
00150             c__ = *rho + z__[1] / (d__[1] - d__[2] - temp);
00151             a = c__ * (d__[2] + d__[3]) + z__[2] + z__[3];
00152             b = c__ * d__[2] * d__[3] + z__[2] * d__[3] + z__[3] * d__[2];
00153         } else {
00154             temp = (d__[1] - d__[2]) / 2.f;
00155             c__ = *rho + z__[3] / (d__[3] - d__[2] - temp);
00156             a = c__ * (d__[1] + d__[2]) + z__[1] + z__[2];
00157             b = c__ * d__[1] * d__[2] + z__[1] * d__[2] + z__[2] * d__[1];
00158         }
00159 /* Computing MAX */
00160         r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
00161                 c__);
00162         temp = dmax(r__1,r__2);
00163         a /= temp;
00164         b /= temp;
00165         c__ /= temp;
00166         if (c__ == 0.f) {
00167             *tau = b / a;
00168         } else if (a <= 0.f) {
00169             *tau = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
00170                     c__ * 2.f);
00171         } else {
00172             *tau = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
00173                     r__1))));
00174         }
00175         if (*tau < lbd || *tau > ubd) {
00176             *tau = (lbd + ubd) / 2.f;
00177         }
00178         if (d__[1] == *tau || d__[2] == *tau || d__[3] == *tau) {
00179             *tau = 0.f;
00180         } else {
00181             temp = *finit + *tau * z__[1] / (d__[1] * (d__[1] - *tau)) + *tau 
00182                     * z__[2] / (d__[2] * (d__[2] - *tau)) + *tau * z__[3] / (
00183                     d__[3] * (d__[3] - *tau));
00184             if (temp <= 0.f) {
00185                 lbd = *tau;
00186             } else {
00187                 ubd = *tau;
00188             }
00189             if (dabs(*finit) <= dabs(temp)) {
00190                 *tau = 0.f;
00191             }
00192         }
00193     }
00194 
00195 /*     get machine parameters for possible scaling to avoid overflow */
00196 
00197 /*     modified by Sven: parameters SMALL1, SMINV1, SMALL2, */
00198 /*     SMINV2, EPS are not SAVEd anymore between one call to the */
00199 /*     others but recomputed at each call */
00200 
00201     eps = slamch_("Epsilon");
00202     base = slamch_("Base");
00203     i__1 = (integer) (log(slamch_("SafMin")) / log(base) / 3.f);
00204     small1 = pow_ri(&base, &i__1);
00205     sminv1 = 1.f / small1;
00206     small2 = small1 * small1;
00207     sminv2 = sminv1 * sminv1;
00208 
00209 /*     Determine if scaling of inputs necessary to avoid overflow */
00210 /*     when computing 1/TEMP**3 */
00211 
00212     if (*orgati) {
00213 /* Computing MIN */
00214         r__3 = (r__1 = d__[2] - *tau, dabs(r__1)), r__4 = (r__2 = d__[3] - *
00215                 tau, dabs(r__2));
00216         temp = dmin(r__3,r__4);
00217     } else {
00218 /* Computing MIN */
00219         r__3 = (r__1 = d__[1] - *tau, dabs(r__1)), r__4 = (r__2 = d__[2] - *
00220                 tau, dabs(r__2));
00221         temp = dmin(r__3,r__4);
00222     }
00223     scale = FALSE_;
00224     if (temp <= small1) {
00225         scale = TRUE_;
00226         if (temp <= small2) {
00227 
00228 /*        Scale up by power of radix nearest 1/SAFMIN**(2/3) */
00229 
00230             sclfac = sminv2;
00231             sclinv = small2;
00232         } else {
00233 
00234 /*        Scale up by power of radix nearest 1/SAFMIN**(1/3) */
00235 
00236             sclfac = sminv1;
00237             sclinv = small1;
00238         }
00239 
00240 /*        Scaling up safe because D, Z, TAU scaled elsewhere to be O(1) */
00241 
00242         for (i__ = 1; i__ <= 3; ++i__) {
00243             dscale[i__ - 1] = d__[i__] * sclfac;
00244             zscale[i__ - 1] = z__[i__] * sclfac;
00245 /* L10: */
00246         }
00247         *tau *= sclfac;
00248         lbd *= sclfac;
00249         ubd *= sclfac;
00250     } else {
00251 
00252 /*        Copy D and Z to DSCALE and ZSCALE */
00253 
00254         for (i__ = 1; i__ <= 3; ++i__) {
00255             dscale[i__ - 1] = d__[i__];
00256             zscale[i__ - 1] = z__[i__];
00257 /* L20: */
00258         }
00259     }
00260 
00261     fc = 0.f;
00262     df = 0.f;
00263     ddf = 0.f;
00264     for (i__ = 1; i__ <= 3; ++i__) {
00265         temp = 1.f / (dscale[i__ - 1] - *tau);
00266         temp1 = zscale[i__ - 1] * temp;
00267         temp2 = temp1 * temp;
00268         temp3 = temp2 * temp;
00269         fc += temp1 / dscale[i__ - 1];
00270         df += temp2;
00271         ddf += temp3;
00272 /* L30: */
00273     }
00274     f = *finit + *tau * fc;
00275 
00276     if (dabs(f) <= 0.f) {
00277         goto L60;
00278     }
00279     if (f <= 0.f) {
00280         lbd = *tau;
00281     } else {
00282         ubd = *tau;
00283     }
00284 
00285 /*        Iteration begins -- Use Gragg-Thornton-Warner cubic convergent */
00286 /*                            scheme */
00287 
00288 /*     It is not hard to see that */
00289 
00290 /*           1) Iterations will go up monotonically */
00291 /*              if FINIT < 0; */
00292 
00293 /*           2) Iterations will go down monotonically */
00294 /*              if FINIT > 0. */
00295 
00296     iter = niter + 1;
00297 
00298     for (niter = iter; niter <= 40; ++niter) {
00299 
00300         if (*orgati) {
00301             temp1 = dscale[1] - *tau;
00302             temp2 = dscale[2] - *tau;
00303         } else {
00304             temp1 = dscale[0] - *tau;
00305             temp2 = dscale[1] - *tau;
00306         }
00307         a = (temp1 + temp2) * f - temp1 * temp2 * df;
00308         b = temp1 * temp2 * f;
00309         c__ = f - (temp1 + temp2) * df + temp1 * temp2 * ddf;
00310 /* Computing MAX */
00311         r__1 = dabs(a), r__2 = dabs(b), r__1 = max(r__1,r__2), r__2 = dabs(
00312                 c__);
00313         temp = dmax(r__1,r__2);
00314         a /= temp;
00315         b /= temp;
00316         c__ /= temp;
00317         if (c__ == 0.f) {
00318             eta = b / a;
00319         } else if (a <= 0.f) {
00320             eta = (a - sqrt((r__1 = a * a - b * 4.f * c__, dabs(r__1)))) / (
00321                     c__ * 2.f);
00322         } else {
00323             eta = b * 2.f / (a + sqrt((r__1 = a * a - b * 4.f * c__, dabs(
00324                     r__1))));
00325         }
00326         if (f * eta >= 0.f) {
00327             eta = -f / df;
00328         }
00329 
00330         *tau += eta;
00331         if (*tau < lbd || *tau > ubd) {
00332             *tau = (lbd + ubd) / 2.f;
00333         }
00334 
00335         fc = 0.f;
00336         erretm = 0.f;
00337         df = 0.f;
00338         ddf = 0.f;
00339         for (i__ = 1; i__ <= 3; ++i__) {
00340             temp = 1.f / (dscale[i__ - 1] - *tau);
00341             temp1 = zscale[i__ - 1] * temp;
00342             temp2 = temp1 * temp;
00343             temp3 = temp2 * temp;
00344             temp4 = temp1 / dscale[i__ - 1];
00345             fc += temp4;
00346             erretm += dabs(temp4);
00347             df += temp2;
00348             ddf += temp3;
00349 /* L40: */
00350         }
00351         f = *finit + *tau * fc;
00352         erretm = (dabs(*finit) + dabs(*tau) * erretm) * 8.f + dabs(*tau) * df;
00353         if (dabs(f) <= eps * erretm) {
00354             goto L60;
00355         }
00356         if (f <= 0.f) {
00357             lbd = *tau;
00358         } else {
00359             ubd = *tau;
00360         }
00361 /* L50: */
00362     }
00363     *info = 1;
00364 L60:
00365 
00366 /*     Undo scaling */
00367 
00368     if (scale) {
00369         *tau *= sclinv;
00370     }
00371     return 0;
00372 
00373 /*     End of SLAED6 */
00374 
00375 } /* slaed6_ */