appended to this message is a a fairly portable C translation of stream.f.
on our hp735 and "cc +P +O3 -J +Om1 -Wl,-a,archive", I get these results:
Timing calibration ; time = 760.00 usec.
Increase the size of the arrays if this is < 300
and your clock precision is =< 1/100 second.
---------------------------------------------------
Function Rate (MB/s) RMS time Min time Max time
Assignment: 69.837 247.083 240.000 260.000
Scaling : 69.837 246.049 240.000 250.000
Summing : 71.832 351.013 350.000 360.000
SAXPYing : 73.945 350.143 340.000 370.000
The code is also available for anon ftp from
neurocog.lrdc.pitt.edu:pub/cstream.c
/*
* Program: Stream
* Programmer: John D. McCalpin
* Revision: 2.0, September 30,1991
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in Fortran. These numbers reveal the
* quality of code generation for simple uncacheable kernels as well
* as showing the cost of floating-point operations relative to memory
* accesses.
*
* INSTRUCTIONS:
* 1) (fortran-specific, omitted.)
* 2) Stream requires a good bit of memory to run.
* Adjust the Parameter 'N' in the second line of the main
* program to give a 'timing calibration' of at least 20 clicks.
* This will provide rate estimates that should be good to
* about 5% precision.
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonable good, on the
* other hand, the optimizer might be too smart for me!
* 4) Mail the results to mccalpin@perelandra.cms.udel.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
*
* Thanks!
*
* This version was ported from the fortran by Mark Hahn, hahn+@pitt.edu.
*/
#define N (1023*1024)
#define NTIMES 10
#define _HPUX_SOURCE 1
#define _POSIX_SOURCE 1
#define _XOPEN_SOURCE 1
#define _INCLUDE_POSIX_SOURCE 1
#include <limits.h>
#include <time.h>
#include <sys/times.h>
#include <math.h>
#include <stdio.h>
#ifndef MIN
#define MIN(x,y) ((x)<(y)?(x):(y))
#define MAX(x,y) ((x)>(y)?(x):(y))
#endif
struct tms tmsStart;
void mtimeStart() {
times(&tmsStart);
}
float mtime() {
struct tms t;
times(&t);
return 1e3 * (float) ((t.tms_stime - tmsStart.tms_stime) +
(t.tms_utime - tmsStart.tms_utime)) / (float) CLK_TCK;
}
typedef double real;
static real a[N],b[N],c[N];
int main() {
int j,k;
float times[4][NTIMES];
static float rmstime[4] = {0};
static float mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
static float maxtime[4] = {0};
static char *label[4] = {"Assignment:",
"Scaling :",
"Summing :",
"SAXPYing :"};
static float bytes[4] = { 2 * sizeof(real) * N,
2 * sizeof(real) * N,
3 * sizeof(real) * N,
3 * sizeof(real) * N};
/* --- SETUP --- determine precision and check timing --- */
mtimeStart();
for (j=0; j<N; j++) {
a[j] = 1.0;
b[j] = 2.0;
c[j] = 0.0;
}
printf("Timing calibration ; time = %.2f usec.\n",mtime());
printf("Increase the size of the arrays if this is < 300\n"
"and your clock precision is =< 1/100 second.\n");
printf("---------------------------------------------------\n");
/* --- MAIN LOOP --- repeat test cases NTIMES times --- */
for (k=0; k<NTIMES; k++) {
mtimeStart();
for (j=0; j<N; j++)
c[j] = a[j];
times[0][k] = mtime();
mtimeStart();
for (j=0; j<N; j++)
c[j] = 3.0e0*a[j];
times[1][k] = mtime();
mtimeStart();
for (j=0; j<N; j++)
c[j] = a[j]+b[j];
times[2][k] = mtime();
mtimeStart();
for (j=0; j<N; j++)
c[j] = a[j]+3.0e0*b[j];
times[3][k] = mtime();
}
/* --- SUMMARY --- */
for (k=0; k<NTIMES; k++) {
for (j=0; j<4; j++) {
rmstime[j] = rmstime[j] + (times[j][k] * times[j][k]);
mintime[j] = MIN(mintime[j], times[j][k]);
maxtime[j] = MAX(maxtime[j], times[j][k]);
}
}
printf("Function Rate (MB/s) RMS time Min time Max time\n");
for (j=0; j<4; j++) {
rmstime[j] = sqrt(rmstime[j]/(float)NTIMES);
printf("%s%11.3f %11.3f %11.3f %11.3f\n",
label[j],
bytes[j]/mintime[j]/1e3,
rmstime[j],
mintime[j],
maxtime[j]);
}
return 0;
}
regards, mark hahn.
-- this space intentionally left non-blank. hahn@neurocog.lrdc.pitt.edu
This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:03 CDT