Hi John,
Here's the results for the XP1000. The submission was long delayed, as
you can see from the date; that was my fault.
Thanks!
- john
Script started on Tue Mar 2 08:16:45 1999
% ls *ftp*
stream_d.f_as_at_ftp_site_22may97
% diff *ftp* tryit.f
53c53,54
< PARAMETER (n=2000000,offset=0,ndim=n+offset,ntimes=10)
--- > PARAMETER (n = 1000020,offset=0) > PARAMETER (ndim=n+offset,ntimes=10) 334a336,359 > END > > DOUBLE PRECISION FUNCTION SECOND > * > * Assume we will be called often enough so that rpcc counter > * doesn't overflow! > * > INTEGER*8 TICK, PREV_TICK > DOUBLE PRECISION RPCC_DELTA, PREV_SEC > INTEGER*8 FIXED_RPCC > EXTERNAL FIXED_RPCC, RPCC_DELTA > > DATA PREV_TICK /0/ > > TICK = FIXED_RPCC() > IF (PREV_TICK .EQ. 0) THEN > SECOND = 0D0 > ELSE > SECOND = PREV_SEC + RPCC_DELTA (%VAL(PREV_TICK), %VAL(TICK)) > END IF > > PREV_SEC = SECOND > PREV_TICK = TICK > RETURN % cat rpcc.c /* rpcc.c j.henning hacked from example by d.grunwald*/#include <c_asm.h> #include <sys/sysinfo.h> #include <machine/hal_sysinfo.h> #include <unistd.h> #include <sys/resource.h> #include <sys/types.h> static double scale = -1; static pid_t mypid = -1; /* fix_rpcc adds the two halves of the register (bias & offset), since some tools return it w/o this fixup */
unsigned long fix_rpcc(unsigned long rpcc) { unsigned long fixed; unsigned long offset = (rpcc >> 32) & 0xffffffff; unsigned long cycles = (rpcc & 0xffffffff); fixed = (unsigned long) offset + (unsigned long) cycles; /* printf ("%8lx %8lx %8lx ", offset, cycles, fixed); */ return fixed; } /* Compute time in *seconds* (note that Grunwald's original version did microseconds) given two fixedup RPCCs */
double rpcc_delta(unsigned long ul_start, unsigned long ul_stop) { double retval; if ( scale == -1 ) { int err; double freq; struct cpu_info cpubuf ; err = getsysinfo(GSI_CPU_INFO, &cpubuf, sizeof(cpubuf)); /*printf ("err= %d freq = %d current_cpu = %d\n", err, cpubuf.mhz, cpubuf.current_cpu);*/ if (err < 0) { freq = 300; } else { freq = cpubuf.mhz; } freq *= 1000000; /* * Scale is the 1 over the clock frequency */ scale = 1/(double) freq; } if (ul_stop < ul_start) { retval = ((0xffffffff - ul_start) + ul_stop) * scale; } else { retval = (ul_stop - ul_start) * scale; }
/* printf ("%f\n", retval); */ return retval; } unsigned long fixed_rpcc(void) { unsigned long i;
i = asm("rpcc %v0"); return fix_rpcc(i); } % cc -c rpcc.c % f77 -non_shared -fast -O5 -unroll 8 -arch ev6 -assume nounderscore -V rpcc.o tryit.f % grep COMPILER: tryit.l COMPILER: DIGITAL Fortran 77 V5.2-171-428BH % ./a.out ---------------------------------------------- Double precision appears to have 16 digits of accuracy Assuming 8 bytes per DOUBLE PRECISION word ---------------------------------------------- Array size = 1000020 Offset = 0 The total memory requirement is 22 MB You are running each test 10 times The *best* time for each test is used ---------------------------------------------------- Your clock granularity appears to be less than one microsecond Your clock granularity/precision appears to be 1 microseconds The tests below will each take a time on the order of 12706 microseconds (= 12706 clock ticks) Increase the size of the arrays if this shows that you are not getting at least 20 clock ticks per test. ---------------------------------------------------- WARNING -- The above is only a rough guideline. For best results, please be sure you know the precision of your system timer. ---------------------------------------------------- Function Rate (MB/s) RMS time Min time Max time Copy: 900.2357 0.0179 0.0178 0.0192 Scale: 971.1877 0.0166 0.0165 0.0172 Add: 917.2598 0.0262 0.0262 0.0265 Triad: 967.7494 0.0248 0.0248 0.0250 Sum of a is = 1.153323847295668E+018 Sum of b is = 2.306647694567821E+017 Sum of c is = 3.075530259436257E+017 % exit % script done on Tue Mar 2 08:17:29 1999
This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:08 CDT