|
kusano |
7d535a |
#include <stdio.h></stdio.h>
|
|
kusano |
7d535a |
#include <stdlib.h></stdlib.h>
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
void mysub(int n, double *x, double *y)
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
return;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
main()
|
|
kusano |
7d535a |
{
|
|
kusano |
7d535a |
/* Parameters */
|
|
kusano |
7d535a |
#define NMAX 1000
|
|
kusano |
7d535a |
#define ITS 100000
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
int i, j, iters;
|
|
kusano |
7d535a |
double alpha, avg, t1, t2, tnotim;
|
|
kusano |
7d535a |
double x[NMAX], y[NMAX];
|
|
kusano |
7d535a |
double SuperLU_timer_();
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Initialize X and Y */
|
|
kusano |
7d535a |
for (i = 0; i < NMAX; ++i) {
|
|
kusano |
7d535a |
x[i] = 1.0 / (double)(i+1);
|
|
kusano |
7d535a |
y[i] = (double)(NMAX - i) / (double)NMAX;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
alpha = 0.315;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Time DAXPY operations */
|
|
kusano |
7d535a |
iters = ITS;
|
|
kusano |
7d535a |
tnotim = 0.0;
|
|
kusano |
7d535a |
while ( tnotim <= 0.0 ) {
|
|
kusano |
7d535a |
t1 = SuperLU_timer_();
|
|
kusano |
7d535a |
for (j = 0; j < iters; ++j) {
|
|
kusano |
7d535a |
for (i = 0; i < NMAX; ++i) y[i] += alpha * x[i];
|
|
kusano |
7d535a |
alpha = -alpha;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
t2 = SuperLU_timer_();
|
|
kusano |
7d535a |
tnotim = t2 - t1;
|
|
kusano |
7d535a |
if ( tnotim > 0. ){
|
|
kusano |
7d535a |
float ops = 2.0 * iters * NMAX * 1e-9;
|
|
kusano |
7d535a |
printf("Time for %d DAXPYs = %10.3g seconds\n",
|
|
kusano |
7d535a |
iters, tnotim);
|
|
kusano |
7d535a |
printf("DAXPY performance rate = %10.3g Gflops\n", ops/tnotim);
|
|
kusano |
7d535a |
} else {
|
|
kusano |
7d535a |
/* this makes sure we dont keep trying forever */
|
|
kusano |
7d535a |
if ( iters > 100000000 ) {
|
|
kusano |
7d535a |
printf("*** Error: Time for operations was zero.\n"
|
|
kusano |
7d535a |
"\tThe timer may not be working correctly.\n");
|
|
kusano |
7d535a |
/*exit(9);*/
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
iters *= 10;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Force gcc not to optimize away the previous loop (DCS) */
|
|
kusano |
7d535a |
printf("y[0]=%g\n", y[0]) ;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
t1 = SuperLU_timer_();
|
|
kusano |
7d535a |
for (j = 0; j < ITS; ++j) {
|
|
kusano |
7d535a |
for (i = 0; i < NMAX; ++i)
|
|
kusano |
7d535a |
y[i] += alpha * x[i];
|
|
kusano |
7d535a |
alpha = -alpha;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
t2 = SuperLU_timer_();
|
|
kusano |
7d535a |
tnotim = t2 - t1;
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Time 1,000,000 DAXPY operations with SuperLU_timer_()
|
|
kusano |
7d535a |
in the outer loop */
|
|
kusano |
7d535a |
t1 = SuperLU_timer_();
|
|
kusano |
7d535a |
for (j = 0; j < ITS; ++j) {
|
|
kusano |
7d535a |
for (i = 0; i < NMAX; ++i)
|
|
kusano |
7d535a |
y[i] += alpha * x[i];
|
|
kusano |
7d535a |
alpha = -alpha;
|
|
kusano |
7d535a |
t2 = SuperLU_timer_();
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the time in milliseconds used by an average call to
|
|
kusano |
7d535a |
SuperLU_timer_(). */
|
|
kusano |
7d535a |
printf("Including DSECND, time = %10.3g seconds\n", t2-t1);
|
|
kusano |
7d535a |
avg = ( (t2 - t1) - tnotim )*1000. / (double)ITS;
|
|
kusano |
7d535a |
printf("Average time for DSECND = %10.3g milliseconds\n", avg);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
/* Compute the equivalent number of floating point operations used
|
|
kusano |
7d535a |
by an average call to DSECND. */
|
|
kusano |
7d535a |
if ( tnotim > 0. )
|
|
kusano |
7d535a |
printf("Equivalent floating point ops = %10.3g ops\n",
|
|
kusano |
7d535a |
1000.*avg / tnotim);
|
|
kusano |
7d535a |
|
|
kusano |
7d535a |
mysub(NMAX, x, y);
|
|
kusano |
7d535a |
return 0;
|
|
kusano |
7d535a |
}
|
|
kusano |
7d535a |
|