$search
00001 00017 /*---------------- Start Alburto's flops.c source code -----------------*/ 00018 00019 /*****************************/ 00020 /* flops.c */ 00021 /* Version 2.0, 18 Dec 1992 */ 00022 /* Al Aburto */ 00023 /* aburto@nosc.mil */ 00024 /*****************************/ 00025 00026 /* 00027 Flops.c is a 'c' program which attempts to estimate your systems 00028 floating-point 'MFLOPS' rating for the FADD, FSUB, FMUL, and FDIV 00029 operations based on specific 'instruction mixes' (discussed below). 00030 The program provides an estimate of PEAK MFLOPS performance by making 00031 maximal use of register variables with minimal interaction with main 00032 memory. The execution loops are all small so that they will fit in 00033 any cache. Flops.c can be used along with Linpack and the Livermore 00034 kernels (which exersize memory much more extensively) to gain further 00035 insight into the limits of system performance. The flops.c execution 00036 modules also include various percent weightings of FDIV's (from 0% to 00037 25% FDIV's) so that the range of performance can be obtained when 00038 using FDIV's. FDIV's, being computationally more intensive than 00039 FADD's or FMUL's, can impact performance considerably on some systems. 00040 00041 Flops.c consists of 8 independent modules (routines) which, except for 00042 module 2, conduct numerical integration of various functions. Module 00043 2, estimates the value of pi based upon the Maclaurin series expansion 00044 of atan(1). MFLOPS ratings are provided for each module, but the 00045 programs overall results are summerized by the MFLOPS(1), MFLOPS(2), 00046 MFLOPS(3), and MFLOPS(4) outputs. 00047 00048 The MFLOPS(1) result is identical to the result provided by all 00049 previous versions of flops.c. It is based only upon the results from 00050 modules 2 and 3. Two problems surfaced in using MFLOPS(1). First, it 00051 was difficult to completely 'vectorize' the result due to the 00052 recurrence of the 's' variable in module 2. This problem is addressed 00053 in the MFLOPS(2) result which does not use module 2, but maintains 00054 nearly the same weighting of FDIV's (9.2%) as in MFLOPS(1) (9.6%). 00055 The second problem with MFLOPS(1) centers around the percentage of 00056 FDIV's (9.6%) which was viewed as too high for an important class of 00057 problems. This concern is addressed in the MFLOPS(3) result where NO 00058 FDIV's are conducted at all. 00059 00060 The number of floating-point instructions per iteration (loop) is 00061 given below for each module executed: 00062 00063 MODULE FADD FSUB FMUL FDIV TOTAL Comment 00064 1 7 0 6 1 14 7.1% FDIV's 00065 2 3 2 1 1 7 difficult to vectorize. 00066 3 6 2 9 0 17 0.0% FDIV's 00067 4 7 0 8 0 15 0.0% FDIV's 00068 5 13 0 15 1 29 3.4% FDIV's 00069 6 13 0 16 0 29 0.0% FDIV's 00070 7 3 3 3 3 12 25.0% FDIV's 00071 8 13 0 17 0 30 0.0% FDIV's 00072 00073 A*2+3 21 12 14 5 52 A=5, MFLOPS(1), Same as 00074 40.4% 23.1% 26.9% 9.6% previous versions of the 00075 flops.c program. Includes 00076 only Modules 2 and 3, does 00077 9.6% FDIV's, and is not 00078 easily vectorizable. 00079 00080 1+3+4 58 14 66 14 152 A=4, MFLOPS(2), New output 00081 +5+6+ 38.2% 9.2% 43.4% 9.2% does not include Module 2, 00082 A*7 but does 9.2% FDIV's. 00083 00084 1+3+4 62 5 74 5 146 A=0, MFLOPS(3), New output 00085 +5+6+ 42.9% 3.4% 50.7% 3.4% does not include Module 2, 00086 7+8 but does 3.4% FDIV's. 00087 00088 3+4+6 39 2 50 0 91 A=0, MFLOPS(4), New output 00089 +8 42.9% 2.2% 54.9% 0.0% does not include Module 2, 00090 and does NO FDIV's. 00091 00092 NOTE: Various timer routines are included as indicated below. The 00093 timer routines, with some comments, are attached at the end 00094 of the main program. 00095 00096 NOTE: Please do not remove any of the printouts. 00097 00098 EXAMPLE COMPILATION: 00099 UNIX based systems 00100 cc -DUNIX -O flops.c -o flops 00101 cc -DUNIX -DROPT flops.c -o flops 00102 cc -DUNIX -fast -O4 flops.c -o flops 00103 . 00104 . 00105 . 00106 etc. 00107 00108 Al Aburto 00109 aburto@nosc.mil 00110 */ 00111 /***************************************************************************** 00112 ** Includes 00113 *****************************************************************************/ 00114 00115 #include <cstdio> 00116 #include <cmath> 00117 #include <ecl/config.hpp> 00118 #include <ecl/command_line.hpp> 00119 #include <ecl/threads/priority.hpp> 00120 00121 /***************************************************************************** 00122 ** Using 00123 *****************************************************************************/ 00124 00125 using ecl::CmdLine; 00126 using ecl::StandardException; 00127 00128 /***************************************************************************** 00129 ** dtime() : Uses Posix .1 calls to evaluate the time. 00130 *****************************************************************************/ 00131 00132 #if defined(ECL_IS_WIN32) 00133 #include <windows.h> 00134 00135 int dtime(double *p) 00136 { 00137 double q; 00138 00139 q = p[2]; 00140 00141 p[2] = (double)GetTickCount() * 1.0e-03; 00142 p[1] = p[2] - q; 00143 00144 return 0; 00145 } 00146 #elif defined(ECL_IS_POSIX) 00147 #include <unistd.h> 00148 #include <limits.h> 00149 #include <sys/times.h> 00150 00151 int dtime(double *p) 00152 { 00153 static struct tms tms; 00154 00155 double q; 00156 times(&tms); 00157 q = p[2]; 00158 p[2] = (double)tms.tms_utime / (double)_SC_CLK_TCK; 00159 p[1] = p[2] - q; 00160 return 0; 00161 } 00162 #endif 00163 00164 00165 /***************************************************************************** 00166 ** Main program 00167 *****************************************************************************/ 00168 int main(int argc, char** argv) 00169 { 00170 try { 00171 ecl::set_priority(ecl::RealTimePriority4); 00172 } catch ( StandardException &e ) { 00173 // dont worry about it. 00174 } 00175 00176 CmdLine cmd("Benchmarks the speed of computation (mflops) on this machine."); 00177 cmd.parse(argc,argv); 00178 00179 /***************************************************************************** 00180 ** Variables 00181 *****************************************************************************/ 00182 00183 double nulltime, TimeArray[3]; /* Variables needed for 'dtime()'. */ 00184 double TLimit; /* Threshold to determine Number of */ 00185 /* Loops to run. Fixed at 15.0 seconds.*/ 00186 00187 double T[36]; /* Global Array used to hold timing */ 00188 /* results and other information. */ 00189 00190 double sa,sb,sc,one,two,three; 00191 // double sd; 00192 double four,five,piref,piprg; 00193 double scale,pierr; 00194 00195 double A0 = 1.0; 00196 double A1 = -0.1666666666671334; 00197 double A2 = 0.833333333809067E-2; 00198 double A3 = 0.198412715551283E-3; 00199 double A4 = 0.27557589750762E-5; 00200 double A5 = 0.2507059876207E-7; 00201 double A6 = 0.164105986683E-9; 00202 00203 // double B0 = 1.0; 00204 double B1 = -0.4999999999982; 00205 double B2 = 0.4166666664651E-1; 00206 double B3 = -0.1388888805755E-2; 00207 double B4 = 0.24801428034E-4; 00208 double B5 = -0.2754213324E-6; 00209 double B6 = 0.20189405E-8; 00210 00211 // double C0 = 1.0; 00212 // double C1 = 0.99999999668; 00213 // double C2 = 0.49999995173; 00214 // double C3 = 0.16666704243; 00215 // double C4 = 0.4166685027E-1; 00216 // double C5 = 0.832672635E-2; 00217 // double C6 = 0.140836136E-2; 00218 // double C7 = 0.17358267E-3; 00219 // double C8 = 0.3931683E-4; 00220 00221 double D1 = 0.3999999946405E-1; 00222 double D2 = 0.96E-3; 00223 double D3 = 0.1233153E-5; 00224 00225 double E2 = 0.48E-3; 00226 double E3 = 0.411051E-6; 00227 00228 double s,u,v,w,x; 00229 00230 long loops, NLimit; 00231 register long i, m, n; 00232 00233 printf("\n"); 00234 printf(" FLOPS C Program (Double Precision), V2.0 18 Dec 1992\n\n"); 00235 00236 /****************************/ 00237 loops = 15625; /* Initial number of loops. */ 00238 /* DO NOT CHANGE! */ 00239 /****************************/ 00240 00241 /****************************************************/ 00242 /* Set Variable Values. */ 00243 /* T[1] references all timing results relative to */ 00244 /* one million loops. */ 00245 /* */ 00246 /* The program will execute from 31250 to 512000000 */ 00247 /* loops based on a runtime of Module 1 of at least */ 00248 /* TLimit = 15.0 seconds. That is, a runtime of 15 */ 00249 /* seconds for Module 1 is used to determine the */ 00250 /* number of loops to execute. */ 00251 /* */ 00252 /* No more than NLimit = 512000000 loops are allowed*/ 00253 /****************************************************/ 00254 00255 T[1] = 1.0E+06/(double)loops; 00256 00257 TLimit = 15.0; 00258 NLimit = 512000000; 00259 00260 piref = 3.14159265358979324; 00261 one = 1.0; 00262 two = 2.0; 00263 three = 3.0; 00264 four = 4.0; 00265 five = 5.0; 00266 scale = one; 00267 00268 printf(" Module Error RunTime MFLOPS Math Calculation Operations\n"); 00269 printf(" (usec)\n"); 00270 /*************************/ 00271 /* Initialize the timer. */ 00272 /*************************/ 00273 00274 dtime(TimeArray); 00275 dtime(TimeArray); 00276 00277 /*******************************************************/ 00278 /* Module 1. Calculate integral of df(x)/f(x) defined */ 00279 /* below. Result is ln(f(1)). There are 14 */ 00280 /* double precision operations per loop */ 00281 /* ( 7 +, 0 -, 6 *, 1 / ) that are included */ 00282 /* in the timing. */ 00283 /* 50.0% +, 00.0% -, 42.9% *, and 07.1% / */ 00284 /*******************************************************/ 00285 n = loops; 00286 sa = 0.0; 00287 00288 s = 0.0; // Initialising to remove warning 00289 x = 0.0; 00290 00291 while ( sa < TLimit ) 00292 { 00293 n = 2 * n; 00294 x = one / (double)n; /*********************/ 00295 s = 0.0; /* Loop 1. */ 00296 v = 0.0; /*********************/ 00297 w = one; 00298 00299 dtime(TimeArray); 00300 for( i = 1 ; i <= n-1 ; i++ ) 00301 { 00302 v = v + w; 00303 u = v * x; 00304 s = s + (D1+u*(D2+u*D3))/(w+u*(D1+u*(E2+u*E3))); 00305 } 00306 dtime(TimeArray); 00307 sa = TimeArray[1]; 00308 00309 if ( n == NLimit ) break; 00310 /* printf(" %10ld %12.5lf\n",n,sa); */ 00311 } 00312 00313 scale = 1.0E+06 / (double)n; 00314 T[1] = scale; 00315 00316 /****************************************/ 00317 /* Estimate nulltime ('for' loop time). */ 00318 /****************************************/ 00319 dtime(TimeArray); 00320 for( i = 1 ; i <= n-1 ; i++ ) 00321 { 00322 } 00323 dtime(TimeArray); 00324 nulltime = T[1] * TimeArray[1]; 00325 if ( nulltime < 0.0 ) nulltime = 0.0; 00326 00327 T[2] = T[1] * sa - nulltime; 00328 00329 sa = (D1+D2+D3)/(one+D1+E2+E3); 00330 sb = D1; 00331 00332 T[3] = T[2] / 14.0; /*********************/ 00333 sa = x * ( sa + sb + two * s ) / two; /* Module 1 Results. */ 00334 sb = one / sa; /*********************/ 00335 n = (long)( (double)( 40000 * (long)sb ) / scale ); 00336 sc = sb - 25.2; 00337 T[4] = one / T[3]; 00338 /********************/ 00339 /* DO NOT REMOVE */ 00340 /* THIS PRINTOUT! */ 00341 /********************/ 00342 printf(" 1 %13.4le %10.4lf %10.4lf Integration [ 7+, 0-, 6*, 1/]\n",sc,T[2],T[4]); 00343 00344 m = n; 00345 00346 /*******************************************************/ 00347 /* Module 2. Calculate value of PI from Taylor Series */ 00348 /* expansion of atan(1.0). There are 7 */ 00349 /* double precision operations per loop */ 00350 /* ( 3 +, 2 -, 1 *, 1 / ) that are included */ 00351 /* in the timing. */ 00352 /* 42.9% +, 28.6% -, 14.3% *, and 14.3% / */ 00353 /*******************************************************/ 00354 00355 s = -five; /********************/ 00356 sa = -one; /* Loop 2. */ 00357 /********************/ 00358 dtime(TimeArray); 00359 for ( i = 1 ; i <= m ; i++ ) 00360 { 00361 s = -s; 00362 sa = sa + s; 00363 } 00364 dtime(TimeArray); 00365 T[5] = T[1] * TimeArray[1]; 00366 if ( T[5] < 0.0 ) T[5] = 0.0; 00367 00368 sc = (double)m; 00369 00370 u = sa; /*********************/ 00371 v = 0.0; /* Loop 3. */ 00372 w = 0.0; /*********************/ 00373 x = 0.0; 00374 00375 dtime(TimeArray); 00376 for ( i = 1 ; i <= m ; i++) 00377 { 00378 s = -s; 00379 sa = sa + s; 00380 u = u + two; 00381 x = x +(s - u); 00382 v = v - s * u; 00383 w = w + s / u; 00384 } 00385 dtime(TimeArray); 00386 T[6] = T[1] * TimeArray[1]; 00387 00388 T[7] = ( T[6] - T[5] ) / 7.0; /*********************/ 00389 m = (long)( sa * x / sc ); /* PI Results */ 00390 sa = four * w / five; /*********************/ 00391 sb = sa + five / v; 00392 sc = 31.25; 00393 piprg = sb - sc / (v * v * v); 00394 pierr = piprg - piref; 00395 T[8] = one / T[7]; 00396 /*********************/ 00397 /* DO NOT REMOVE */ 00398 /* THIS PRINTOUT! */ 00399 /*********************/ 00400 printf(" 2 %13.4le %10.4lf %10.4lf Taylor Series [ 3+, 2-, 1*, 1/]\n",pierr,T[6]-T[5],T[8]); 00401 00402 /*******************************************************/ 00403 /* Module 3. Calculate integral of sin(x) from 0.0 to */ 00404 /* PI/3.0 using Trapazoidal Method. Result */ 00405 /* is 0.5. There are 17 double precision */ 00406 /* operations per loop (6 +, 2 -, 9 *, 0 /) */ 00407 /* included in the timing. */ 00408 /* 35.3% +, 11.8% -, 52.9% *, and 00.0% / */ 00409 /*******************************************************/ 00410 00411 x = piref / ( three * (double)m ); /*********************/ 00412 s = 0.0; /* Loop 4. */ 00413 v = 0.0; /*********************/ 00414 00415 dtime(TimeArray); 00416 for( i = 1 ; i <= m-1 ; i++ ) 00417 { 00418 v = v + one; 00419 u = v * x; 00420 w = u * u; 00421 s = s + u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one); 00422 } 00423 dtime(TimeArray); 00424 T[9] = T[1] * TimeArray[1] - nulltime; 00425 00426 u = piref / three; 00427 w = u * u; 00428 sa = u * ((((((A6*w-A5)*w+A4)*w-A3)*w+A2)*w+A1)*w+one); 00429 00430 T[10] = T[9] / 17.0; /*********************/ 00431 sa = x * ( sa + two * s ) / two; /* sin(x) Results. */ 00432 sb = 0.5; /*********************/ 00433 sc = sa - sb; 00434 T[11] = one / T[10]; 00435 /*********************/ 00436 /* DO NOT REMOVE */ 00437 /* THIS PRINTOUT! */ 00438 /*********************/ 00439 printf(" 3 %13.4le %10.4lf %10.4lf Trapezoidal Sum (sin) [ 6+, 2-, 9*, 0/]\n",sc,T[9],T[11]); 00440 00441 /************************************************************/ 00442 /* Module 4. Calculate Integral of cos(x) from 0.0 to PI/3 */ 00443 /* using the Trapazoidal Method. Result is */ 00444 /* sin(PI/3). There are 15 double precision */ 00445 /* operations per loop (7 +, 0 -, 8 *, and 0 / ) */ 00446 /* included in the timing. */ 00447 /* 50.0% +, 00.0% -, 50.0% *, 00.0% / */ 00448 /************************************************************/ 00449 A3 = -A3; 00450 A5 = -A5; 00451 x = piref / ( three * (double)m ); /*********************/ 00452 s = 0.0; /* Loop 5. */ 00453 v = 0.0; /*********************/ 00454 00455 dtime(TimeArray); 00456 for( i = 1 ; i <= m-1 ; i++ ) 00457 { 00458 u = (double)i * x; 00459 w = u * u; 00460 s = s + w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one; 00461 } 00462 dtime(TimeArray); 00463 T[12] = T[1] * TimeArray[1] - nulltime; 00464 00465 u = piref / three; 00466 w = u * u; 00467 sa = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one; 00468 00469 T[13] = T[12] / 15.0; /*******************/ 00470 sa = x * ( sa + one + two * s ) / two; /* Module 4 Result */ 00471 u = piref / three; /*******************/ 00472 w = u * u; 00473 sb = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+A0); 00474 sc = sa - sb; 00475 T[14] = one / T[13]; 00476 /*********************/ 00477 /* DO NOT REMOVE */ 00478 /* THIS PRINTOUT! */ 00479 /*********************/ 00480 printf(" 4 %13.4le %10.4lf %10.4lf Trapezoidal Sum (cos) [ 7+, 0-, 8*, 0/]\n",sc,T[12],T[14]); 00481 00482 /************************************************************/ 00483 /* Module 5. Calculate Integral of tan(x) from 0.0 to PI/3 */ 00484 /* using the Trapazoidal Method. Result is */ 00485 /* ln(cos(PI/3)). There are 29 double precision */ 00486 /* operations per loop (13 +, 0 -, 15 *, and 1 /)*/ 00487 /* included in the timing. */ 00488 /* 46.7% +, 00.0% -, 50.0% *, and 03.3% / */ 00489 /************************************************************/ 00490 00491 x = piref / ( three * (double)m ); /*********************/ 00492 s = 0.0; /* Loop 6. */ 00493 v = 0.0; /*********************/ 00494 00495 dtime(TimeArray); 00496 for( i = 1 ; i <= m-1 ; i++ ) 00497 { 00498 u = (double)i * x; 00499 w = u * u; 00500 v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one); 00501 s = s + v / (w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one); 00502 } 00503 dtime(TimeArray); 00504 T[15] = T[1] * TimeArray[1] - nulltime; 00505 00506 u = piref / three; 00507 w = u * u; 00508 sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one); 00509 sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one; 00510 sa = sa / sb; 00511 00512 T[16] = T[15] / 29.0; /*******************/ 00513 sa = x * ( sa + two * s ) / two; /* Module 5 Result */ 00514 sb = 0.6931471805599453; /*******************/ 00515 sc = sa - sb; 00516 T[17] = one / T[16]; 00517 /*********************/ 00518 /* DO NOT REMOVE */ 00519 /* THIS PRINTOUT! */ 00520 /*********************/ 00521 printf(" 5 %13.4le %10.4lf %10.4lf Trapezoidal Sum (tan) [13+, 0-,15*, 1/]\n",sc,T[15],T[17]); 00522 00523 /************************************************************/ 00524 /* Module 6. Calculate Integral of sin(x)*cos(x) from 0.0 */ 00525 /* to PI/4 using the Trapazoidal Method. Result */ 00526 /* is sin(PI/4)^2. There are 29 double precision */ 00527 /* operations per loop (13 +, 0 -, 16 *, and 0 /)*/ 00528 /* included in the timing. */ 00529 /* 46.7% +, 00.0% -, 53.3% *, and 00.0% / */ 00530 /************************************************************/ 00531 00532 x = piref / ( four * (double)m ); /*********************/ 00533 s = 0.0; /* Loop 7. */ 00534 v = 0.0; /*********************/ 00535 00536 dtime(TimeArray); 00537 for( i = 1 ; i <= m-1 ; i++ ) 00538 { 00539 u = (double)i * x; 00540 w = u * u; 00541 v = u * ((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one); 00542 s = s + v*(w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one); 00543 } 00544 dtime(TimeArray); 00545 T[18] = T[1] * TimeArray[1] - nulltime; 00546 00547 u = piref / four; 00548 w = u * u; 00549 sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one); 00550 sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one; 00551 sa = sa * sb; 00552 00553 T[19] = T[18] / 29.0; /*******************/ 00554 sa = x * ( sa + two * s ) / two; /* Module 6 Result */ 00555 sb = 0.25; /*******************/ 00556 sc = sa - sb; 00557 T[20] = one / T[19]; 00558 /*********************/ 00559 /* DO NOT REMOVE */ 00560 /* THIS PRINTOUT! */ 00561 /*********************/ 00562 printf(" 6 %13.4le %10.4lf %10.4lf Trapezoidal Sum (sin*cos) [13+, 0-,16*, 0/]\n",sc,T[18],T[20]); 00563 00564 00565 /*******************************************************/ 00566 /* Module 7. Calculate value of the definite integral */ 00567 /* from 0 to sa of 1/(x+1), x/(x*x+1), and */ 00568 /* x*x/(x*x*x+1) using the Trapizoidal Rule.*/ 00569 /* There are 12 double precision operations */ 00570 /* per loop ( 3 +, 3 -, 3 *, and 3 / ) that */ 00571 /* are included in the timing. */ 00572 /* 25.0% +, 25.0% -, 25.0% *, and 25.0% / */ 00573 /*******************************************************/ 00574 00575 /*********************/ 00576 s = 0.0; /* Loop 8. */ 00577 w = one; /*********************/ 00578 sa = 102.3321513995275; 00579 v = sa / (double)m; 00580 00581 dtime(TimeArray); 00582 for ( i = 1 ; i <= m-1 ; i++) 00583 { 00584 x = (double)i * v; 00585 u = x * x; 00586 s = s - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w ); 00587 } 00588 dtime(TimeArray); 00589 T[21] = T[1] * TimeArray[1] - nulltime; 00590 /*********************/ 00591 /* Module 7 Results */ 00592 /*********************/ 00593 T[22] = T[21] / 12.0; 00594 x = sa; 00595 u = x * x; 00596 sa = -w - w / ( x + w ) - x / ( u + w ) - u / ( x * u + w ); 00597 sa = 18.0 * v * (sa + two * s ); 00598 00599 m = -2000 * (long)sa; 00600 m = (long)( (double)m / scale ); 00601 00602 sc = sa + 500.2; 00603 T[23] = one / T[22]; 00604 /********************/ 00605 /* DO NOT REMOVE */ 00606 /* THIS PRINTOUT! */ 00607 /********************/ 00608 printf(" 7 %13.4le %10.4lf %10.4lf Trapezoidal Sum (polynomial) [ 3+, 3-, 3*, 3/]\n",sc,T[21],T[23]); 00609 00610 /************************************************************/ 00611 /* Module 8. Calculate Integral of sin(x)*cos(x)*cos(x) */ 00612 /* from 0 to PI/3 using the Trapazoidal Method. */ 00613 /* Result is (1-cos(PI/3)^3)/3. There are 30 */ 00614 /* double precision operations per loop included */ 00615 /* in the timing: */ 00616 /* 13 +, 0 -, 17 * 0 / */ 00617 /* 46.7% +, 00.0% -, 53.3% *, and 00.0% / */ 00618 /************************************************************/ 00619 00620 x = piref / ( three * (double)m ); /*********************/ 00621 s = 0.0; /* Loop 9. */ 00622 v = 0.0; /*********************/ 00623 00624 dtime(TimeArray); 00625 for( i = 1 ; i <= m-1 ; i++ ) 00626 { 00627 u = (double)i * x; 00628 w = u * u; 00629 v = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one; 00630 s = s + v*v*u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one); 00631 } 00632 dtime(TimeArray); 00633 T[24] = T[1] * TimeArray[1] - nulltime; 00634 00635 u = piref / three; 00636 w = u * u; 00637 sa = u*((((((A6*w+A5)*w+A4)*w+A3)*w+A2)*w+A1)*w+one); 00638 sb = w*(w*(w*(w*(w*(B6*w+B5)+B4)+B3)+B2)+B1)+one; 00639 sa = sa * sb * sb; 00640 00641 T[25] = T[24] / 30.0; /*******************/ 00642 sa = x * ( sa + two * s ) / two; /* Module 8 Result */ 00643 sb = 0.29166666666666667; /*******************/ 00644 sc = sa - sb; 00645 T[26] = one / T[25]; 00646 /*********************/ 00647 /* DO NOT REMOVE */ 00648 /* THIS PRINTOUT! */ 00649 /*********************/ 00650 printf(" 8 %13.4le %10.4lf %10.4lf Trapezoidal Sum (sin*cos*cos) [13+, 0-,17*, 0/]\n",sc,T[24],T[26]); 00651 00652 /**************************************************/ 00653 /* MFLOPS(1) output. This is the same weighting */ 00654 /* used for all previous versions of the flops.c */ 00655 /* program. Includes Modules 2 and 3 only. */ 00656 /**************************************************/ 00657 T[27] = ( five * (T[6] - T[5]) + T[9] ) / 52.0; 00658 T[28] = one / T[27]; 00659 00660 /**************************************************/ 00661 /* MFLOPS(2) output. This output does not include */ 00662 /* Module 2, but it still does 9.2% FDIV's. */ 00663 /**************************************************/ 00664 T[29] = T[2] + T[9] + T[12] + T[15] + T[18]; 00665 T[29] = (T[29] + four * T[21]) / 152.0; 00666 T[30] = one / T[29]; 00667 00668 /**************************************************/ 00669 /* MFLOPS(3) output. This output does not include */ 00670 /* Module 2, but it still does 3.4% FDIV's. */ 00671 /**************************************************/ 00672 T[31] = T[2] + T[9] + T[12] + T[15] + T[18]; 00673 T[31] = (T[31] + T[21] + T[24]) / 146.0; 00674 T[32] = one / T[31]; 00675 00676 /**************************************************/ 00677 /* MFLOPS(4) output. This output does not include */ 00678 /* Module 2, and it does NO FDIV's. */ 00679 /**************************************************/ 00680 T[33] = (T[9] + T[12] + T[18] + T[24]) / 91.0; 00681 T[34] = one / T[33]; 00682 00683 00684 printf("\n"); 00685 printf("Averaging various groups above\n"); 00686 printf("\n"); 00687 printf(" Iterations = %10ld\n",m); 00688 printf(" NullTime (usec) = %10.4lf\n",nulltime); 00689 printf(" MFLOPS(1) = %10.4lf [generic 2,3 only]\n",T[28]); 00690 printf(" MFLOPS(2) = %10.4lf [9.2%% fp divisions]\n",T[30]); 00691 printf(" MFLOPS(3) = %10.4lf [3.4%% fp divisions]\n",T[32]); 00692 printf(" MFLOPS(4) = %10.4lf [0.0%% fp divisions]\n\n",T[34]); 00693 00694 return 0; 00695 }