ecl_core_apps: ecl_core_apps: flops.cpp Source File

00001 
00017 /*---------------- Start Alburto's flops.c source code -----------------*/
00018 
00019 /*****************************/
00020 /*          flops.c          */
00021 /* Version 2.0,  18 Dec 1992 */
00022 /*         Al Aburto         */
00023 /*      aburto@nosc.mil      */
00024 /*****************************/
00025 
00026 /*
00027    Flops.c is a 'c' program which attempts to estimate your systems
00028    floating-point 'MFLOPS' rating for the FADD, FSUB, FMUL, and FDIV
00029    operations based on specific 'instruction mixes' (discussed below).
00030    The program provides an estimate of PEAK MFLOPS performance by making
00031    maximal use of register variables with minimal interaction with main
00032    memory. The execution loops are all small so that they will fit in
00033    any cache. Flops.c can be used along with Linpack and the Livermore
00034    kernels (which exersize memory much more extensively) to gain further
00035    insight into the limits of system performance. The flops.c execution
00036    modules also include various percent weightings of FDIV's (from 0% to
00037    25% FDIV's) so that the range of performance can be obtained when
00038    using FDIV's. FDIV's, being computationally more intensive than
00039    FADD's or FMUL's, can impact performance considerably on some systems.
00040 
00041    Flops.c consists of 8 independent modules (routines) which, except for
00042    module 2, conduct numerical integration of various functions. Module
00043    2, estimates the value of pi based upon the Maclaurin series expansion
00044    of atan(1). MFLOPS ratings are provided for each module, but the
00045    programs overall results are summerized by the MFLOPS(1), MFLOPS(2),
00046    MFLOPS(3), and MFLOPS(4) outputs.
00047 
00048    The MFLOPS(1) result is identical to the result provided by all
00049    previous versions of flops.c. It is based only upon the results from
00050    modules 2 and 3. Two problems surfaced in using MFLOPS(1). First, it
00051    was difficult to completely 'vectorize' the result due to the
00052    recurrence of the 's' variable in module 2. This problem is addressed
00053    in the MFLOPS(2) result which does not use module 2, but maintains
00054    nearly the same weighting of FDIV's (9.2%) as in MFLOPS(1) (9.6%).
00055    The second problem with MFLOPS(1) centers around the percentage of
00056    FDIV's (9.6%) which was viewed as too high for an important class of
00057    problems. This concern is addressed in the MFLOPS(3) result where NO
00058    FDIV's are conducted at all.
00059 
00060    The number of floating-point instructions per iteration (loop) is
00061    given below for each module executed:
00062 
00063    MODULE   FADD   FSUB   FMUL   FDIV   TOTAL  Comment
00064      1        7      0      6      1      14   7.1%  FDIV's
00065      2        3      2      1      1       7   difficult to vectorize.
00066      3        6      2      9      0      17   0.0%  FDIV's
00067      4        7      0      8      0      15   0.0%  FDIV's
00068      5       13      0     15      1      29   3.4%  FDIV's
00069      6       13      0     16      0      29   0.0%  FDIV's
00070      7        3      3      3      3      12   25.0% FDIV's
00071      8       13      0     17      0      30   0.0%  FDIV's
00072 
00073    A*2+3     21     12     14      5      52   A=5, MFLOPS(1), Same as
00074        40.4%  23.1%  26.9%  9.6%          previous versions of the
00075                         flops.c program. Includes
00076                         only Modules 2 and 3, does
00077                         9.6% FDIV's, and is not
00078                         easily vectorizable.
00079 
00080    1+3+4     58     14     66     14     152   A=4, MFLOPS(2), New output
00081    +5+6+    38.2%  9.2%   43.4%  9.2%          does not include Module 2,
00082    A*7                                         but does 9.2% FDIV's.
00083 
00084    1+3+4     62      5     74      5     146   A=0, MFLOPS(3), New output
00085    +5+6+    42.9%  3.4%   50.7%  3.4%          does not include Module 2,
00086    7+8                                         but does 3.4% FDIV's.
00087 
00088    3+4+6     39      2     50      0      91   A=0, MFLOPS(4), New output
00089    +8       42.9%  2.2%   54.9%  0.0%          does not include Module 2,
00090                         and does NO FDIV's.
00091 
00092    NOTE: Various timer routines are included as indicated below. The
00093     timer routines, with some comments, are attached at the end
00094     of the main program.
00095 
00096    NOTE: Please do not remove any of the printouts.
00097 
00098    EXAMPLE COMPILATION:
00099    UNIX based systems
00100        cc -DUNIX -O flops.c -o flops
00101        cc -DUNIX -DROPT flops.c -o flops
00102        cc -DUNIX -fast -O4 flops.c -o flops
00103        .
00104        .
00105        .
00106      etc.
00107 
00108    Al Aburto
00109    aburto@nosc.mil
00110 */
00111 /*****************************************************************************
00112 ** Includes
00113 *****************************************************************************/
00114 
00115 #include <cstdio>
00116 #include <cmath>
00117 #include <ecl/config.hpp>
00118 #include <ecl/command_line.hpp>
00119 #include <ecl/threads/priority.hpp>
00120 
00121 /*****************************************************************************
00122 ** Using
00123 *****************************************************************************/
00124 
00125 using ecl::CmdLine;
00126 using ecl::StandardException;
00127 
00128 /*****************************************************************************
00129 ** dtime() : Uses Posix .1 calls to evaluate the time.
00130 *****************************************************************************/
00131 
00132 #if defined(ECL_IS_WIN32)
00133     #include <windows.h>
00134 
00135     int dtime(double *p)
00136     {
00137      double q;
00138 
00139      q = p[2];
00140 
00141      p[2] = (double)GetTickCount() * 1.0e-03;
00142      p[1] = p[2] - q;
00143 
00144      return 0;
00145     }
00146 #elif defined(ECL_IS_POSIX)
00147     #include <unistd.h>
00148     #include <limits.h>
00149     #include <sys/times.h>
00150 
00151     int dtime(double *p)
00152     {
00153         static struct tms tms;
00154 
00155         double q;
00156         times(&tms);
00157         q = p[2];
00158         p[2] = (double)tms.tms_utime / (double)_SC_CLK_TCK;
00159         p[1] = p[2] - q;
00160         return 0;
00161     }
00162 #endif
00163 
00164 
00165 /*****************************************************************************
00166 ** Main program
00167 *****************************************************************************/
00168 int main(int argc, char** argv)
00169 {
00170         try {
00171                 ecl::set_priority(ecl::RealTimePriority4);
00172         } catch ( StandardException &e ) {
00173                 // dont worry about it.
00174         }
00175 
00176     CmdLine cmd("Benchmarks the speed of computation (mflops) on this machine.");
00177     cmd.parse(argc,argv);
00178 
00179     /*****************************************************************************
00180     ** Variables
00181     *****************************************************************************/
00182 
00183     double nulltime, TimeArray[3];   /* Variables needed for 'dtime()'.     */
00184     double TLimit;                   /* Threshold to determine Number of    */
00185                      /* Loops to run. Fixed at 15.0 seconds.*/
00186 
00187     double T[36];                    /* Global Array used to hold timing    */
00188                      /* results and other information.      */
00189 
00190     double sa,sb,sc,one,two,three;
00191 //    double sd;
00192     double four,five,piref,piprg;
00193     double scale,pierr;
00194 
00195     double A0 = 1.0;
00196     double A1 = -0.1666666666671334;
00197     double A2 = 0.833333333809067E-2;
00198     double A3 = 0.198412715551283E-3;
00199     double A4 = 0.27557589750762E-5;
00200     double A5 = 0.2507059876207E-7;
00201     double A6 = 0.164105986683E-9;
00202 
00203 //    double B0 = 1.0;
00204     double B1 = -0.4999999999982;
00205     double B2 = 0.4166666664651E-1;
00206     double B3 = -0.1388888805755E-2;
00207     double B4 = 0.24801428034E-4;
00208     double B5 = -0.2754213324E-6;
00209     double B6 = 0.20189405E-8;
00210 
00211 //    double C0 = 1.0;
00212 //    double C1 = 0.99999999668;
00213 //    double C2 = 0.49999995173;
00214 //    double C3 = 0.16666704243;
00215 //    double C4 = 0.4166685027E-1;
00216 //    double C5 = 0.832672635E-2;
00217 //    double C6 = 0.140836136E-2;
00218 //    double C7 = 0.17358267E-3;
00219 //    double C8 = 0.3931683E-4;
00220 
00221     double D1 = 0.3999999946405E-1;
00222     double D2 = 0.96E-3;
00223     double D3 = 0.1233153E-5;
00224 
00225     double E2 = 0.48E-3;
00226     double E3 = 0.411051E-6;
00227 
00228    double s,u,v,w,x;
00229 
00230    long loops, NLimit;
00231    register long i, m, n;
00232 
00233    printf("\n");
00234    printf("   FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n");
00235 
00236                         /****************************/
00237    loops = 15625;       /* Initial number of loops. */
00238                         /*     DO NOT CHANGE!       */
00239                         /****************************/
00240 
00241     /****************************************************/
00242     /* Set Variable Values.                             */
00243     /* T[1] references all timing results relative to   */
00244     /* one million loops.                               */
00245     /*                                                  */
00246     /* The program will execute from 31250 to 512000000 */
00247     /* loops based on a runtime of Module 1 of at least */
00248     /* TLimit = 15.0 seconds. That is, a runtime of 15  */
00249     /* seconds for Module 1 is used to determine the    */
00250     /* number of loops to execute.                      */
00251     /*                                                  */
00252     /* No more than NLimit = 512000000 loops are allowed*/
00253     /****************************************************/
00254 
00255    T[1] = 1.0E+06/(double)loops;
00256 
00257    TLimit = 15.0;
00258    NLimit = 512000000;
00259 
00260    piref = 3.14159265358979324;
00261    one   = 1.0;
00262    two   = 2.0;
00263    three = 3.0;
00264    four  = 4.0;
00265    five  = 5.0;
00266    scale = one;
00267 
00268    printf("   Module     Error        RunTime      MFLOPS    Math Calculation     Operations\n");
00269    printf("                            (usec)\n");
00270     /*************************/
00271     /* Initialize the timer. */
00272     /*************************/
00273 
00274    dtime(TimeArray);
00275    dtime(TimeArray);
00276 
00277     /*******************************************************/
00278     /* Module 1.  Calculate integral of df(x)/f(x) defined */
00279     /*            below.  Result is ln(f(1)). There are 14 */
00280     /*            double precision operations per loop     */
00281     /*            ( 7 +, 0 -, 6 *, 1 / ) that are included */
00282     /*            in the timing.                           */
00283     /*            50.0% +, 00.0% -, 42.9% *, and 07.1% /   */
00284     /*******************************************************/
00285    n = loops;
00286    sa = 0.0;
00287 
00288    s = 0.0; // Initialising to remove warning
00289    x = 0.0;
00290 
00291    while ( sa < TLimit )
00292    {
00293    n = 2 * n;
00294    x = one / (double)n;                            /*********************/
00295    s = 0.0;                                        /*  Loop 1.          */
00296    v = 0.0;                                        /*********************/
00297    w = one;
00298 
00299        dtime(TimeArray);
00300        for( i = 1 ; i <= n-1 ; i++ )
00301        {
00302        v = v + w;
00303        u = v * x;
00304        s = s + (D1+u*(D2+u*D3))/(w+u*(D1+u*(E2+u*E3)));
00305        }
00306        dtime(TimeArray);
00307        sa = TimeArray[1];
00308 
00309    if ( n == NLimit ) break;
00310    /* printf(" %10ld  %12.5lf\n",n,sa); */
00311    }
00312 
00313    scale = 1.0E+06 / (double)n;
00314    T[1]  = scale;
00315 
00316 /****************************************/
00317 /* Estimate nulltime ('for' loop time). */
00318 /****************************************/
00319    dtime(TimeArray);
00320    for( i = 1 ; i <= n-1 ; i++ )
00321    {
00322    }
00323    dtime(TimeArray);
00324    nulltime = T[1] * TimeArray[1];
00325    if ( nulltime < 0.0 ) nulltime = 0.0;
00326 
00327    T[2] = T[1] * sa - nulltime;
00328 
00329    sa = (D1+D2+D3)/(one+D1+E2+E3);
00330    sb = D1;
00331 
00332    T[3] = T[2] / 14.0;                             /*********************/
00333    sa = x * ( sa + sb + two * s ) / two;           /* Module 1 Results. */
00334    sb = one / sa;                                  /*********************/
00335    n  = (long)( (double)( 40000 * (long)sb ) / scale );
00336    sc = sb - 25.2;
00337    T[4] = one / T[3];
00338                            /********************/
00339                            /*  DO NOT REMOVE   */
00340                            /*  THIS PRINTOUT!  */
00341                            /********************/
00342    printf("     1   %13.4le  %10.4lf  %10.4lf    Integration                      [ 7+, 0-, 6*, 1/]\n",sc,T[2],T[4]);
00343 
00344    m = n;
00345 
00346     /*******************************************************/
00347     /* Module 2.  Calculate value of PI from Taylor Series */
00348     /*            expansion of atan(1.0).  There are 7     */
00349     /*            double precision operations per loop     */
00350     /*            ( 3 +, 2 -, 1 *, 1 / ) that are included */
00351     /*            in the timing.                           */
00352     /*            42.9% +, 28.6% -, 14.3% *, and 14.3% /   */
00353     /*******************************************************/
00354 
00355    s  = -five;                                      /********************/
00356    sa = -one;                                       /* Loop 2.          */
00357                            /********************/
00358    dtime(TimeArray);
00359    for ( i = 1 ; i <= m ; i++ )
00360    {
00361    s  = -s;
00362    sa = sa + s;
00363    }
00364    dtime(TimeArray);
00365    T[5] = T[1] * TimeArray[1];
00366    if ( T[5] < 0.0 ) T[5] = 0.0;
00367 
00368    sc   = (double)m;
00369 
00370    u = sa;                                         /*********************/
00371    v = 0.0;                                        /* Loop 3.           */
00372    w = 0.0;                                        /*********************/
00373    x = 0.0;
00374 
00375    dtime(TimeArray);
00376    for ( i = 1 ; i <= m ; i++)
00377    {
00378    s  = -s;
00379    sa = sa + s;
00380    u  = u + two;
00381    x  = x +(s - u);
00382    v  = v - s * u;
00383    w  = w + s / u;
00384    }
00385    dtime(TimeArray);
00386    T[6] = T[1] * TimeArray[1];
00387 
00388    T[7] = ( T[6] - T[5] ) / 7.0;                   /*********************/
00389    m  = (long)( sa * x  / sc );                    /*  PI Results       */
00390    sa = four * w / five;                           /*********************/
00391    sb = sa + five / v;
00392    sc = 31.25;
00393    piprg = sb - sc / (v * v * v);
00394    pierr = piprg - piref;
00395    T[8]  = one  / T[7];
00396                           /*********************/
00397                           /*   DO NOT REMOVE   */
00398                           /*   THIS PRINTOUT!  */
00399                           /*********************/
00400    printf("     2   %13.4le  %10.4lf  %10.4lf    Taylor Series                    [ 3+, 2-, 1*, 1/]\n",pierr,T[6]-T[5],T[8]);
00401 
00402     /*******************************************************/
00403     /* Module 3.  Calculate integral of sin(x) from 0.0 to */
00404     /*            PI/3.0 using Trapazoidal Method. Result  */
00405     /*            is 0.5. There are 17 double precision    */
00406     /*            operations per loop (6 +, 2 -, 9 *, 0 /) */
00407     /*            included in the timing.                  */
00408     /*            35.3% +, 11.8% -, 52.9% *, and 00.0% /   */
00409     /*******************************************************/
00410 
00411    x = piref / ( three * (double)m );              /*********************/
00412    s = 0.0;                                        /*  Loop 4.          */
00413    v = 0.0;                                        /*********************/
00414 
00415    dtime(TimeArray);
00416    for( i = 1 ; i <= m-1 ; i++ )
00417    {
00418    v = v + one;
00419    u = v * x;
00420    w = u * u;
00421    s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one);
00422    }
00423    dtime(TimeArray);
00424    T[9]  = T[1] * TimeArray[1] - nulltime;
00425 
00426    u  = piref / three;
00427    w  = u * u;
00428    sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one);
00429 
00430    T[10] = T[9] / 17.0;                            /*********************/
00431    sa = x * ( sa + two * s ) / two;                /* sin(x) Results.   */
00432    sb = 0.5;                                       /*********************/
00433    sc = sa - sb;
00434    T[11] = one / T[10];
00435                           /*********************/
00436                           /*   DO NOT REMOVE   */
00437                           /*   THIS PRINTOUT!  */
00438                           /*********************/
00439    printf("     3   %13.4le  %10.4lf  %10.4lf    Trapezoidal Sum (sin)            [ 6+, 2-, 9*, 0/]\n",sc,T[9],T[11]);
00440 
00441 /************************************************************/
00442 /* Module 4.  Calculate Integral of cos(x) from 0.0 to PI/3 */
00443 /*            using the Trapazoidal Method. Result is       */
00444 /*            sin(PI/3). There are 15 double precision      */
00445 /*            operations per loop (7 +, 0 -, 8 *, and 0 / ) */
00446 /*            included in the timing.                       */
00447 /*            50.0% +, 00.0% -, 50.0% *, 00.0% /            */
00448 /************************************************************/
00449    A3 = -A3;
00450    A5 = -A5;
00451    x = piref / ( three * (double)m );              /*********************/
00452    s = 0.0;                                        /*  Loop 5.          */
00453    v = 0.0;                                        /*********************/
00454 
00455    dtime(TimeArray);
00456    for( i = 1 ; i <= m-1 ; i++ )
00457    {
00458    u = (double)i * x;
00459    w = u * u;
00460    s = s + w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
00461    }
00462    dtime(TimeArray);
00463    T[12]  = T[1] * TimeArray[1] - nulltime;
00464 
00465    u  = piref / three;
00466    w  = u * u;
00467    sa = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
00468 
00469    T[13] = T[12] / 15.0;                             /*******************/
00470    sa = x * ( sa + one + two * s ) / two;            /* Module 4 Result */
00471    u  = piref / three;                               /*******************/
00472    w  = u * u;
00473    sb = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+A0);
00474    sc = sa - sb;
00475    T[14] = one / T[13];
00476                           /*********************/
00477                           /*   DO NOT REMOVE   */
00478                           /*   THIS PRINTOUT!  */
00479                           /*********************/
00480    printf("     4   %13.4le  %10.4lf  %10.4lf    Trapezoidal Sum (cos)            [ 7+, 0-, 8*, 0/]\n",sc,T[12],T[14]);
00481 
00482 /************************************************************/
00483 /* Module 5.  Calculate Integral of tan(x) from 0.0 to PI/3 */
00484 /*            using the Trapazoidal Method. Result is       */
00485 /*            ln(cos(PI/3)). There are 29 double precision  */
00486 /*            operations per loop (13 +, 0 -, 15 *, and 1 /)*/
00487 /*            included in the timing.                       */
00488 /*            46.7% +, 00.0% -, 50.0% *, and 03.3% /        */
00489 /************************************************************/
00490 
00491    x = piref / ( three * (double)m );              /*********************/
00492    s = 0.0;                                        /*  Loop 6.          */
00493    v = 0.0;                                        /*********************/
00494 
00495    dtime(TimeArray);
00496    for( i = 1 ; i <= m-1 ; i++ )
00497    {
00498    u = (double)i * x;
00499    w = u * u;
00500    v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
00501    s = s + v / (w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
00502    }
00503    dtime(TimeArray);
00504    T[15]  = T[1] * TimeArray[1] - nulltime;
00505 
00506    u  = piref / three;
00507    w  = u * u;
00508    sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
00509    sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
00510    sa = sa / sb;
00511 
00512    T[16] = T[15] / 29.0;                             /*******************/
00513    sa = x * ( sa + two * s ) / two;                  /* Module 5 Result */
00514    sb = 0.6931471805599453;                          /*******************/
00515    sc = sa - sb;
00516    T[17] = one / T[16];
00517                           /*********************/
00518                           /*   DO NOT REMOVE   */
00519                           /*   THIS PRINTOUT!  */
00520                           /*********************/
00521    printf("     5   %13.4le  %10.4lf  %10.4lf    Trapezoidal Sum (tan)            [13+, 0-,15*, 1/]\n",sc,T[15],T[17]);
00522 
00523 /************************************************************/
00524 /* Module 6.  Calculate Integral of sin(x)*cos(x) from 0.0  */
00525 /*            to PI/4 using the Trapazoidal Method. Result  */
00526 /*            is sin(PI/4)^2. There are 29 double precision */
00527 /*            operations per loop (13 +, 0 -, 16 *, and 0 /)*/
00528 /*            included in the timing.                       */
00529 /*            46.7% +, 00.0% -, 53.3% *, and 00.0% /        */
00530 /************************************************************/
00531 
00532    x = piref / ( four * (double)m );               /*********************/
00533    s = 0.0;                                        /*  Loop 7.          */
00534    v = 0.0;                                        /*********************/
00535 
00536    dtime(TimeArray);
00537    for( i = 1 ; i <= m-1 ; i++ )
00538    {
00539    u = (double)i * x;
00540    w = u * u;
00541    v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
00542    s = s + v*(w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one);
00543    }
00544    dtime(TimeArray);
00545    T[18]  = T[1] * TimeArray[1] - nulltime;
00546 
00547    u  = piref / four;
00548    w  = u * u;
00549    sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
00550    sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
00551    sa = sa * sb;
00552 
00553    T[19] = T[18] / 29.0;                             /*******************/
00554    sa = x * ( sa + two * s ) / two;                  /* Module 6 Result */
00555    sb = 0.25;                                        /*******************/
00556    sc = sa - sb;
00557    T[20] = one / T[19];
00558                           /*********************/
00559                           /*   DO NOT REMOVE   */
00560                           /*   THIS PRINTOUT!  */
00561                           /*********************/
00562    printf("     6   %13.4le  %10.4lf  %10.4lf    Trapezoidal Sum (sin*cos)        [13+, 0-,16*, 0/]\n",sc,T[18],T[20]);
00563 
00564 
00565 /*******************************************************/
00566 /* Module 7.  Calculate value of the definite integral */
00567 /*            from 0 to sa of 1/(x+1), x/(x*x+1), and  */
00568 /*            x*x/(x*x*x+1) using the Trapizoidal Rule.*/
00569 /*            There are 12 double precision operations */
00570 /*            per loop ( 3 +, 3 -, 3 *, and 3 / ) that */
00571 /*            are included in the timing.              */
00572 /*            25.0% +, 25.0% -, 25.0% *, and 25.0% /   */
00573 /*******************************************************/
00574 
00575                           /*********************/
00576    s = 0.0;                                        /* Loop 8.           */
00577    w = one;                                        /*********************/
00578    sa = 102.3321513995275;
00579    v = sa / (double)m;
00580 
00581    dtime(TimeArray);
00582    for ( i = 1 ; i <= m-1 ; i++)
00583    {
00584    x = (double)i * v;
00585    u = x * x;
00586    s = s - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w );
00587    }
00588    dtime(TimeArray);
00589    T[21] = T[1] * TimeArray[1] - nulltime;
00590                           /*********************/
00591                           /* Module 7 Results  */
00592                           /*********************/
00593    T[22] = T[21] / 12.0;
00594    x  = sa;
00595    u  = x * x;
00596    sa = -w - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w );
00597    sa = 18.0 * v * (sa + two * s );
00598 
00599    m  = -2000 * (long)sa;
00600    m = (long)( (double)m / scale );
00601 
00602    sc = sa + 500.2;
00603    T[23] = one / T[22];
00604                           /********************/
00605                           /*  DO NOT REMOVE   */
00606                           /*  THIS PRINTOUT!  */
00607                           /********************/
00608    printf("     7   %13.4le  %10.4lf  %10.4lf    Trapezoidal Sum (polynomial)     [ 3+, 3-, 3*, 3/]\n",sc,T[21],T[23]);
00609 
00610 /************************************************************/
00611 /* Module 8.  Calculate Integral of sin(x)*cos(x)*cos(x)    */
00612 /*            from 0 to PI/3 using the Trapazoidal Method.  */
00613 /*            Result is (1-cos(PI/3)^3)/3. There are 30     */
00614 /*            double precision operations per loop included */
00615 /*            in the timing:                                */
00616 /*               13 +,     0 -,    17 *          0 /        */
00617 /*            46.7% +, 00.0% -, 53.3% *, and 00.0% /        */
00618 /************************************************************/
00619 
00620    x = piref / ( three * (double)m );              /*********************/
00621    s = 0.0;                                        /*  Loop 9.          */
00622    v = 0.0;                                        /*********************/
00623 
00624    dtime(TimeArray);
00625    for( i = 1 ; i <= m-1 ; i++ )
00626    {
00627    u = (double)i * x;
00628    w = u * u;
00629    v = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
00630    s = s + v*v*u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
00631    }
00632    dtime(TimeArray);
00633    T[24]  = T[1] * TimeArray[1] - nulltime;
00634 
00635    u  = piref / three;
00636    w  = u * u;
00637    sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one);
00638    sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one;
00639    sa = sa * sb * sb;
00640 
00641    T[25] = T[24] / 30.0;                             /*******************/
00642    sa = x * ( sa + two * s ) / two;                  /* Module 8 Result */
00643    sb = 0.29166666666666667;                         /*******************/
00644    sc = sa - sb;
00645    T[26] = one / T[25];
00646                           /*********************/
00647                           /*   DO NOT REMOVE   */
00648                           /*   THIS PRINTOUT!  */
00649                           /*********************/
00650    printf("     8   %13.4le  %10.4lf  %10.4lf    Trapezoidal Sum (sin*cos*cos)    [13+, 0-,17*, 0/]\n",sc,T[24],T[26]);
00651 
00652 /**************************************************/
00653 /* MFLOPS(1) output. This is the same weighting   */
00654 /* used for all previous versions of the flops.c  */
00655 /* program. Includes Modules 2 and 3 only.        */
00656 /**************************************************/
00657    T[27] = ( five * (T[6] - T[5]) + T[9] ) / 52.0;
00658    T[28] = one  / T[27];
00659 
00660 /**************************************************/
00661 /* MFLOPS(2) output. This output does not include */
00662 /* Module 2, but it still does 9.2% FDIV's.       */
00663 /**************************************************/
00664    T[29] = T[2] + T[9] + T[12] + T[15] + T[18];
00665    T[29] = (T[29] + four * T[21]) / 152.0;
00666    T[30] = one / T[29];
00667 
00668 /**************************************************/
00669 /* MFLOPS(3) output. This output does not include */
00670 /* Module 2, but it still does 3.4% FDIV's.       */
00671 /**************************************************/
00672    T[31] = T[2] + T[9] + T[12] + T[15] + T[18];
00673    T[31] = (T[31] + T[21] + T[24]) / 146.0;
00674    T[32] = one / T[31];
00675 
00676 /**************************************************/
00677 /* MFLOPS(4) output. This output does not include */
00678 /* Module 2, and it does NO FDIV's.               */
00679 /**************************************************/
00680    T[33] = (T[9] + T[12] + T[18] + T[24]) / 91.0;
00681    T[34] = one / T[33];
00682 
00683 
00684    printf("\n");
00685    printf("Averaging various groups above\n");
00686    printf("\n");
00687    printf("   Iterations      = %10ld\n",m);
00688    printf("   NullTime (usec) = %10.4lf\n",nulltime);
00689    printf("   MFLOPS(1)       = %10.4lf    [generic 2,3 only]\n",T[28]);
00690    printf("   MFLOPS(2)       = %10.4lf    [9.2%% fp divisions]\n",T[30]);
00691    printf("   MFLOPS(3)       = %10.4lf    [3.4%% fp divisions]\n",T[32]);
00692    printf("   MFLOPS(4)       = %10.4lf    [0.0%% fp divisions]\n\n",T[34]);
00693 
00694    return 0;
00695 }