swiftnav: slaneg.c Source File

Go to the documentation of this file.
00001 /* slaneg.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 integer slaneg_(integer *n, real *d__, real *lld, real *sigma, real *pivmin, 
00017         integer *r__)
00018 {
00019     /* System generated locals */
00020     integer ret_val, i__1, i__2, i__3, i__4;
00021 
00022     /* Local variables */
00023     integer j;
00024     real p, t;
00025     integer bj;
00026     real tmp;
00027     integer neg1, neg2;
00028     real bsav, gamma, dplus;
00029     integer negcnt;
00030     logical sawnan;
00031     extern logical sisnan_(real *);
00032     real dminus;
00033 
00034 
00035 /*  -- LAPACK auxiliary routine (version 3.2) -- */
00036 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00037 /*     November 2006 */
00038 
00039 /*     .. Scalar Arguments .. */
00040 /*     .. */
00041 /*     .. Array Arguments .. */
00042 /*     .. */
00043 
00044 /*  Purpose */
00045 /*  ======= */
00046 
00047 /*  SLANEG computes the Sturm count, the number of negative pivots */
00048 /*  encountered while factoring tridiagonal T - sigma I = L D L^T. */
00049 /*  This implementation works directly on the factors without forming */
00050 /*  the tridiagonal matrix T.  The Sturm count is also the number of */
00051 /*  eigenvalues of T less than sigma. */
00052 
00053 /*  This routine is called from SLARRB. */
00054 
00055 /*  The current routine does not use the PIVMIN parameter but rather */
00056 /*  requires IEEE-754 propagation of Infinities and NaNs.  This */
00057 /*  routine also has no input range restrictions but does require */
00058 /*  default exception handling such that x/0 produces Inf when x is */
00059 /*  non-zero, and Inf/Inf produces NaN.  For more information, see: */
00060 
00061 /*    Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
00062 /*    Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
00063 /*    Scientific Computing, v28, n5, 2006.  DOI 10.1137/050641624 */
00064 /*    (Tech report version in LAWN 172 with the same title.) */
00065 
00066 /*  Arguments */
00067 /*  ========= */
00068 
00069 /*  N       (input) INTEGER */
00070 /*          The order of the matrix. */
00071 
00072 /*  D       (input) REAL             array, dimension (N) */
00073 /*          The N diagonal elements of the diagonal matrix D. */
00074 
00075 /*  LLD     (input) REAL             array, dimension (N-1) */
00076 /*          The (N-1) elements L(i)*L(i)*D(i). */
00077 
00078 /*  SIGMA   (input) REAL */
00079 /*          Shift amount in T - sigma I = L D L^T. */
00080 
00081 /*  PIVMIN  (input) REAL */
00082 /*          The minimum pivot in the Sturm sequence.  May be used */
00083 /*          when zero pivots are encountered on non-IEEE-754 */
00084 /*          architectures. */
00085 
00086 /*  R       (input) INTEGER */
00087 /*          The twist index for the twisted factorization that is used */
00088 /*          for the negcount. */
00089 
00090 /*  Further Details */
00091 /*  =============== */
00092 
00093 /*  Based on contributions by */
00094 /*     Osni Marques, LBNL/NERSC, USA */
00095 /*     Christof Voemel, University of California, Berkeley, USA */
00096 /*     Jason Riedy, University of California, Berkeley, USA */
00097 
00098 /*  ===================================================================== */
00099 
00100 /*     .. Parameters .. */
00101 /*     Some architectures propagate Infinities and NaNs very slowly, so */
00102 /*     the code computes counts in BLKLEN chunks.  Then a NaN can */
00103 /*     propagate at most BLKLEN columns before being detected.  This is */
00104 /*     not a general tuning parameter; it needs only to be just large */
00105 /*     enough that the overhead is tiny in common cases. */
00106 /*     .. */
00107 /*     .. Local Scalars .. */
00108 /*     .. */
00109 /*     .. Intrinsic Functions .. */
00110 /*     .. */
00111 /*     .. External Functions .. */
00112 /*     .. */
00113 /*     .. Executable Statements .. */
00114     /* Parameter adjustments */
00115     --lld;
00116     --d__;
00117 
00118     /* Function Body */
00119     negcnt = 0;
00120 /*     I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
00121     t = -(*sigma);
00122     i__1 = *r__ - 1;
00123     for (bj = 1; bj <= i__1; bj += 128) {
00124         neg1 = 0;
00125         bsav = t;
00126 /* Computing MIN */
00127         i__3 = bj + 127, i__4 = *r__ - 1;
00128         i__2 = min(i__3,i__4);
00129         for (j = bj; j <= i__2; ++j) {
00130             dplus = d__[j] + t;
00131             if (dplus < 0.f) {
00132                 ++neg1;
00133             }
00134             tmp = t / dplus;
00135             t = tmp * lld[j] - *sigma;
00136 /* L21: */
00137         }
00138         sawnan = sisnan_(&t);
00139 /*     Run a slower version of the above loop if a NaN is detected. */
00140 /*     A NaN should occur only with a zero pivot after an infinite */
00141 /*     pivot.  In that case, substituting 1 for T/DPLUS is the */
00142 /*     correct limit. */
00143         if (sawnan) {
00144             neg1 = 0;
00145             t = bsav;
00146 /* Computing MIN */
00147             i__3 = bj + 127, i__4 = *r__ - 1;
00148             i__2 = min(i__3,i__4);
00149             for (j = bj; j <= i__2; ++j) {
00150                 dplus = d__[j] + t;
00151                 if (dplus < 0.f) {
00152                     ++neg1;
00153                 }
00154                 tmp = t / dplus;
00155                 if (sisnan_(&tmp)) {
00156                     tmp = 1.f;
00157                 }
00158                 t = tmp * lld[j] - *sigma;
00159 /* L22: */
00160             }
00161         }
00162         negcnt += neg1;
00163 /* L210: */
00164     }
00165 
00166 /*     II) lower part: L D L^T - SIGMA I = U- D- U-^T */
00167     p = d__[*n] - *sigma;
00168     i__1 = *r__;
00169     for (bj = *n - 1; bj >= i__1; bj += -128) {
00170         neg2 = 0;
00171         bsav = p;
00172 /* Computing MAX */
00173         i__3 = bj - 127;
00174         i__2 = max(i__3,*r__);
00175         for (j = bj; j >= i__2; --j) {
00176             dminus = lld[j] + p;
00177             if (dminus < 0.f) {
00178                 ++neg2;
00179             }
00180             tmp = p / dminus;
00181             p = tmp * d__[j] - *sigma;
00182 /* L23: */
00183         }
00184         sawnan = sisnan_(&p);
00185 /*     As above, run a slower version that substitutes 1 for Inf/Inf. */
00186 
00187         if (sawnan) {
00188             neg2 = 0;
00189             p = bsav;
00190 /* Computing MAX */
00191             i__3 = bj - 127;
00192             i__2 = max(i__3,*r__);
00193             for (j = bj; j >= i__2; --j) {
00194                 dminus = lld[j] + p;
00195                 if (dminus < 0.f) {
00196                     ++neg2;
00197                 }
00198                 tmp = p / dminus;
00199                 if (sisnan_(&tmp)) {
00200                     tmp = 1.f;
00201                 }
00202                 p = tmp * d__[j] - *sigma;
00203 /* L24: */
00204             }
00205         }
00206         negcnt += neg2;
00207 /* L230: */
00208     }
00209 
00210 /*     III) Twist index */
00211 /*       T was shifted by SIGMA initially. */
00212     gamma = t + *sigma + p;
00213     if (gamma < 0.f) {
00214         ++negcnt;
00215     }
00216     ret_val = negcnt;
00217     return ret_val;
00218 } /* slaneg_ */