swiftnav: dgehrd.c Source File

Go to the documentation of this file.
00001 /* dgehrd.f -- translated by f2c (version 20061008).
00002    You must link the resulting object file with libf2c:
00003         on Microsoft Windows system, link with libf2c.lib;
00004         on Linux or Unix systems, link with .../path/to/libf2c.a -lm
00005         or, if you install libf2c.a in a standard place, with -lf2c -lm
00006         -- in that order, at the end of the command line, as in
00007                 cc *.o -lf2c -lm
00008         Source for libf2c is in /netlib/f2c/libf2c.zip, e.g.,
00009 
00010                 http://www.netlib.org/f2c/libf2c.zip
00011 */
00012 
00013 #include "f2c.h"
00014 #include "blaswrap.h"
00015 
00016 /* Table of constant values */
00017 
00018 static integer c__1 = 1;
00019 static integer c_n1 = -1;
00020 static integer c__3 = 3;
00021 static integer c__2 = 2;
00022 static integer c__65 = 65;
00023 static doublereal c_b25 = -1.;
00024 static doublereal c_b26 = 1.;
00025 
00026 /* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi, 
00027         doublereal *a, integer *lda, doublereal *tau, doublereal *work, 
00028         integer *lwork, integer *info)
00029 {
00030     /* System generated locals */
00031     integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
00032 
00033     /* Local variables */
00034     integer i__, j;
00035     doublereal t[4160]  /* was [65][64] */;
00036     integer ib;
00037     doublereal ei;
00038     integer nb, nh, nx, iws;
00039     extern /* Subroutine */ int dgemm_(char *, char *, integer *, integer *, 
00040             integer *, doublereal *, doublereal *, integer *, doublereal *, 
00041             integer *, doublereal *, doublereal *, integer *);
00042     integer nbmin, iinfo;
00043     extern /* Subroutine */ int dtrmm_(char *, char *, char *, char *, 
00044             integer *, integer *, doublereal *, doublereal *, integer *, 
00045             doublereal *, integer *), daxpy_(
00046             integer *, doublereal *, doublereal *, integer *, doublereal *, 
00047             integer *), dgehd2_(integer *, integer *, integer *, doublereal *, 
00048              integer *, doublereal *, doublereal *, integer *), dlahr2_(
00049             integer *, integer *, integer *, doublereal *, integer *, 
00050             doublereal *, doublereal *, integer *, doublereal *, integer *), 
00051             dlarfb_(char *, char *, char *, char *, integer *, integer *, 
00052             integer *, doublereal *, integer *, doublereal *, integer *, 
00053             doublereal *, integer *, doublereal *, integer *), xerbla_(char *, integer *);
00054     extern integer ilaenv_(integer *, char *, char *, integer *, integer *, 
00055             integer *, integer *);
00056     integer ldwork, lwkopt;
00057     logical lquery;
00058 
00059 
00060 /*  -- LAPACK routine (version 3.2) -- */
00061 /*     Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
00062 /*     November 2006 */
00063 
00064 /*     .. Scalar Arguments .. */
00065 /*     .. */
00066 /*     .. Array Arguments .. */
00067 /*     .. */
00068 
00069 /*  Purpose */
00070 /*  ======= */
00071 
00072 /*  DGEHRD reduces a real general matrix A to upper Hessenberg form H by */
00073 /*  an orthogonal similarity transformation:  Q' * A * Q = H . */
00074 
00075 /*  Arguments */
00076 /*  ========= */
00077 
00078 /*  N       (input) INTEGER */
00079 /*          The order of the matrix A.  N >= 0. */
00080 
00081 /*  ILO     (input) INTEGER */
00082 /*  IHI     (input) INTEGER */
00083 /*          It is assumed that A is already upper triangular in rows */
00084 /*          and columns 1:ILO-1 and IHI+1:N. ILO and IHI are normally */
00085 /*          set by a previous call to DGEBAL; otherwise they should be */
00086 /*          set to 1 and N respectively. See Further Details. */
00087 /*          1 <= ILO <= IHI <= N, if N > 0; ILO=1 and IHI=0, if N=0. */
00088 
00089 /*  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N) */
00090 /*          On entry, the N-by-N general matrix to be reduced. */
00091 /*          On exit, the upper triangle and the first subdiagonal of A */
00092 /*          are overwritten with the upper Hessenberg matrix H, and the */
00093 /*          elements below the first subdiagonal, with the array TAU, */
00094 /*          represent the orthogonal matrix Q as a product of elementary */
00095 /*          reflectors. See Further Details. */
00096 
00097 /*  LDA     (input) INTEGER */
00098 /*          The leading dimension of the array A.  LDA >= max(1,N). */
00099 
00100 /*  TAU     (output) DOUBLE PRECISION array, dimension (N-1) */
00101 /*          The scalar factors of the elementary reflectors (see Further */
00102 /*          Details). Elements 1:ILO-1 and IHI:N-1 of TAU are set to */
00103 /*          zero. */
00104 
00105 /*  WORK    (workspace/output) DOUBLE PRECISION array, dimension (LWORK) */
00106 /*          On exit, if INFO = 0, WORK(1) returns the optimal LWORK. */
00107 
00108 /*  LWORK   (input) INTEGER */
00109 /*          The length of the array WORK.  LWORK >= max(1,N). */
00110 /*          For optimum performance LWORK >= N*NB, where NB is the */
00111 /*          optimal blocksize. */
00112 
00113 /*          If LWORK = -1, then a workspace query is assumed; the routine */
00114 /*          only calculates the optimal size of the WORK array, returns */
00115 /*          this value as the first entry of the WORK array, and no error */
00116 /*          message related to LWORK is issued by XERBLA. */
00117 
00118 /*  INFO    (output) INTEGER */
00119 /*          = 0:  successful exit */
00120 /*          < 0:  if INFO = -i, the i-th argument had an illegal value. */
00121 
00122 /*  Further Details */
00123 /*  =============== */
00124 
00125 /*  The matrix Q is represented as a product of (ihi-ilo) elementary */
00126 /*  reflectors */
00127 
00128 /*     Q = H(ilo) H(ilo+1) . . . H(ihi-1). */
00129 
00130 /*  Each H(i) has the form */
00131 
00132 /*     H(i) = I - tau * v * v' */
00133 
00134 /*  where tau is a real scalar, and v is a real vector with */
00135 /*  v(1:i) = 0, v(i+1) = 1 and v(ihi+1:n) = 0; v(i+2:ihi) is stored on */
00136 /*  exit in A(i+2:ihi,i), and tau in TAU(i). */
00137 
00138 /*  The contents of A are illustrated by the following example, with */
00139 /*  n = 7, ilo = 2 and ihi = 6: */
00140 
00141 /*  on entry,                        on exit, */
00142 
00143 /*  ( a   a   a   a   a   a   a )    (  a   a   h   h   h   h   a ) */
00144 /*  (     a   a   a   a   a   a )    (      a   h   h   h   h   a ) */
00145 /*  (     a   a   a   a   a   a )    (      h   h   h   h   h   h ) */
00146 /*  (     a   a   a   a   a   a )    (      v2  h   h   h   h   h ) */
00147 /*  (     a   a   a   a   a   a )    (      v2  v3  h   h   h   h ) */
00148 /*  (     a   a   a   a   a   a )    (      v2  v3  v4  h   h   h ) */
00149 /*  (                         a )    (                          a ) */
00150 
00151 /*  where a denotes an element of the original matrix A, h denotes a */
00152 /*  modified element of the upper Hessenberg matrix H, and vi denotes an */
00153 /*  element of the vector defining H(i). */
00154 
00155 /*  This file is a slight modification of LAPACK-3.0's DGEHRD */
00156 /*  subroutine incorporating improvements proposed by Quintana-Orti and */
00157 /*  Van de Geijn (2005). */
00158 
00159 /*  ===================================================================== */
00160 
00161 /*     .. Parameters .. */
00162 /*     .. */
00163 /*     .. Local Scalars .. */
00164 /*     .. */
00165 /*     .. Local Arrays .. */
00166 /*     .. */
00167 /*     .. External Subroutines .. */
00168 /*     .. */
00169 /*     .. Intrinsic Functions .. */
00170 /*     .. */
00171 /*     .. External Functions .. */
00172 /*     .. */
00173 /*     .. Executable Statements .. */
00174 
00175 /*     Test the input parameters */
00176 
00177     /* Parameter adjustments */
00178     a_dim1 = *lda;
00179     a_offset = 1 + a_dim1;
00180     a -= a_offset;
00181     --tau;
00182     --work;
00183 
00184     /* Function Body */
00185     *info = 0;
00186 /* Computing MIN */
00187     i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1);
00188     nb = min(i__1,i__2);
00189     lwkopt = *n * nb;
00190     work[1] = (doublereal) lwkopt;
00191     lquery = *lwork == -1;
00192     if (*n < 0) {
00193         *info = -1;
00194     } else if (*ilo < 1 || *ilo > max(1,*n)) {
00195         *info = -2;
00196     } else if (*ihi < min(*ilo,*n) || *ihi > *n) {
00197         *info = -3;
00198     } else if (*lda < max(1,*n)) {
00199         *info = -5;
00200     } else if (*lwork < max(1,*n) && ! lquery) {
00201         *info = -8;
00202     }
00203     if (*info != 0) {
00204         i__1 = -(*info);
00205         xerbla_("DGEHRD", &i__1);
00206         return 0;
00207     } else if (lquery) {
00208         return 0;
00209     }
00210 
00211 /*     Set elements 1:ILO-1 and IHI:N-1 of TAU to zero */
00212 
00213     i__1 = *ilo - 1;
00214     for (i__ = 1; i__ <= i__1; ++i__) {
00215         tau[i__] = 0.;
00216 /* L10: */
00217     }
00218     i__1 = *n - 1;
00219     for (i__ = max(1,*ihi); i__ <= i__1; ++i__) {
00220         tau[i__] = 0.;
00221 /* L20: */
00222     }
00223 
00224 /*     Quick return if possible */
00225 
00226     nh = *ihi - *ilo + 1;
00227     if (nh <= 1) {
00228         work[1] = 1.;
00229         return 0;
00230     }
00231 
00232 /*     Determine the block size */
00233 
00234 /* Computing MIN */
00235     i__1 = 64, i__2 = ilaenv_(&c__1, "DGEHRD", " ", n, ilo, ihi, &c_n1);
00236     nb = min(i__1,i__2);
00237     nbmin = 2;
00238     iws = 1;
00239     if (nb > 1 && nb < nh) {
00240 
00241 /*        Determine when to cross over from blocked to unblocked code */
00242 /*        (last block is always handled by unblocked code) */
00243 
00244 /* Computing MAX */
00245         i__1 = nb, i__2 = ilaenv_(&c__3, "DGEHRD", " ", n, ilo, ihi, &c_n1);
00246         nx = max(i__1,i__2);
00247         if (nx < nh) {
00248 
00249 /*           Determine if workspace is large enough for blocked code */
00250 
00251             iws = *n * nb;
00252             if (*lwork < iws) {
00253 
00254 /*              Not enough workspace to use optimal NB:  determine the */
00255 /*              minimum value of NB, and reduce NB or force use of */
00256 /*              unblocked code */
00257 
00258 /* Computing MAX */
00259                 i__1 = 2, i__2 = ilaenv_(&c__2, "DGEHRD", " ", n, ilo, ihi, &
00260                         c_n1);
00261                 nbmin = max(i__1,i__2);
00262                 if (*lwork >= *n * nbmin) {
00263                     nb = *lwork / *n;
00264                 } else {
00265                     nb = 1;
00266                 }
00267             }
00268         }
00269     }
00270     ldwork = *n;
00271 
00272     if (nb < nbmin || nb >= nh) {
00273 
00274 /*        Use unblocked code below */
00275 
00276         i__ = *ilo;
00277 
00278     } else {
00279 
00280 /*        Use blocked code */
00281 
00282         i__1 = *ihi - 1 - nx;
00283         i__2 = nb;
00284         for (i__ = *ilo; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
00285 /* Computing MIN */
00286             i__3 = nb, i__4 = *ihi - i__;
00287             ib = min(i__3,i__4);
00288 
00289 /*           Reduce columns i:i+ib-1 to Hessenberg form, returning the */
00290 /*           matrices V and T of the block reflector H = I - V*T*V' */
00291 /*           which performs the reduction, and also the matrix Y = A*V*T */
00292 
00293             dlahr2_(ihi, &i__, &ib, &a[i__ * a_dim1 + 1], lda, &tau[i__], t, &
00294                     c__65, &work[1], &ldwork);
00295 
00296 /*           Apply the block reflector H to A(1:ihi,i+ib:ihi) from the */
00297 /*           right, computing  A := A - Y * V'. V(i+ib,ib-1) must be set */
00298 /*           to 1 */
00299 
00300             ei = a[i__ + ib + (i__ + ib - 1) * a_dim1];
00301             a[i__ + ib + (i__ + ib - 1) * a_dim1] = 1.;
00302             i__3 = *ihi - i__ - ib + 1;
00303             dgemm_("No transpose", "Transpose", ihi, &i__3, &ib, &c_b25, &
00304                     work[1], &ldwork, &a[i__ + ib + i__ * a_dim1], lda, &
00305                     c_b26, &a[(i__ + ib) * a_dim1 + 1], lda);
00306             a[i__ + ib + (i__ + ib - 1) * a_dim1] = ei;
00307 
00308 /*           Apply the block reflector H to A(1:i,i+1:i+ib-1) from the */
00309 /*           right */
00310 
00311             i__3 = ib - 1;
00312             dtrmm_("Right", "Lower", "Transpose", "Unit", &i__, &i__3, &c_b26, 
00313                      &a[i__ + 1 + i__ * a_dim1], lda, &work[1], &ldwork);
00314             i__3 = ib - 2;
00315             for (j = 0; j <= i__3; ++j) {
00316                 daxpy_(&i__, &c_b25, &work[ldwork * j + 1], &c__1, &a[(i__ + 
00317                         j + 1) * a_dim1 + 1], &c__1);
00318 /* L30: */
00319             }
00320 
00321 /*           Apply the block reflector H to A(i+1:ihi,i+ib:n) from the */
00322 /*           left */
00323 
00324             i__3 = *ihi - i__;
00325             i__4 = *n - i__ - ib + 1;
00326             dlarfb_("Left", "Transpose", "Forward", "Columnwise", &i__3, &
00327                     i__4, &ib, &a[i__ + 1 + i__ * a_dim1], lda, t, &c__65, &a[
00328                     i__ + 1 + (i__ + ib) * a_dim1], lda, &work[1], &ldwork);
00329 /* L40: */
00330         }
00331     }
00332 
00333 /*     Use unblocked code to reduce the rest of the matrix */
00334 
00335     dgehd2_(n, &i__, ihi, &a[a_offset], lda, &tau[1], &work[1], &iinfo);
00336     work[1] = (doublereal) iws;
00337 
00338     return 0;
00339 
00340 /*     End of DGEHRD */
00341 
00342 } /* dgehrd_ */