* Program: Stream
* Programmer: John D. McCalpin
* Revision: 2.0, September 30,1991
*
* CM5 Data Parallel version by Rob Womersely 19 April, 1993
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in Fortran. These numbers reveal the
* quality of code generation for simple uncacheable kernels as well
* as showing the cost of floating-point operations relative to memory
* accesses.
*
* INSTRUCTIONS:
* 1) Stream requires a cpu timing function called second().
* A sample is shown below. This is unfortunately rather
* system dependent. It helps to know the granularity of the
* timing. The code below assumes that the granularity is
* 1/100 seconds.
* 2) Stream requires a good bit of memory to run.
* Adjust the Parameter 'N' in the second line of the main
* program to give a 'timing calibration' of at least 20 clicks.
* This will provide rate estimates that should be good to
* about 5% precision.
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonable good, on the
* other hand, the optimizer might be too smart for me!
* 4) Mail the results to mccalpin@perelandra.cms.udel.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
*
* Thanks!
*
PROGRAM stream
C .. Parameters ..
INTEGER, PARAMETER :: n = 5000000, ntimes = 20
C ..
C .. Local Scalars ..
REAL t, t0
INTEGER j, k, nbpw, nvu
C ..
C .. Local Arrays ..
REAL, ARRAY(n) :: a, b, c
CMF$ LAYOUT a(:news), b(:news), c(:news)
REAL, ARRAY(4) :: maxtime,mintime,rmstime
REAL, ARRAY(4, ntimes) :: times
INTEGER bytes(4)
CHARACTER label(4)*12
C ..
C .. External Functions ..
INTEGER CMF_number_of_processors
DOUBLE PRECISION CM_timer_read_cm_busy, CM_timer_read_cm_idle
DOUBLE PRECISION CM_timer_read_elapsed
EXTERNAL CMF_number_of_processors
EXTERNAL CM_timer_read_cm_busy, CM_timer_read_cm_idle
EXTERNAL CM_timer_read_elapsed
INTEGER realsize
EXTERNAL realsize
C ..
C .. Intrinsic Functions ..
INTRINSIC float,max,min,sqrt
C ..
C .. Data statements ..
DATA label/' Assignment:',' Scaling :',' Summing :',
$ ' SAXPYing :'/
DATA bytes/2,2,3,3/
C ..
* --- SETUP --- determine precision and check timing ---
PRINT *,'STREAM: Measure memory transfer rates in MB/s'
PRINT *,'for simple computational kernels in Fortran'
PRINT *
PRINT *,'CALL CMF_describe_array(a)'
CALL CMF_describe_array(a)
nvu = CMF_number_of_processors()
WRITE(*,'(/1x,A,I2,A,I2,A/)') 'CM5 with partition of ',nvu/4,
$ ' processors ( ',nvu,' vector units )'
nbpw = realsize()
CALL CM_timer_clear(0)
CALL CM_timer_start(0)
a = 1.0
b = 2.0
c = 0.0
CALL CM_timer_stop(0)
t = CM_timer_read_cm_busy(0)
PRINT *
PRINT *,'Vector length = ', n
PRINT *,'Timing calibration: Time = ',t*100,' hundredths',
$ ' of a second'
PRINT *,'Increase the size of the arrays if this is < 30'
PRINT *,'and your clock precision is = < 1/100 second'
* --- MAIN LOOP --- repeat test cases NTIMES times ---
DO 60 k = 1, ntimes
CALL CM_timer_clear(1)
CALL CM_timer_start(1)
c = a
CALL CM_timer_stop(1)
t = CM_timer_read_elapsed(1)
times(1,k) = t
CALL CM_timer_clear(2)
CALL CM_timer_start(2)
c = 3.0 * a
CALL CM_timer_stop(2)
t = CM_timer_read_elapsed(2)
times(2,k) = t
CALL CM_timer_clear(3)
CALL CM_timer_start(3)
c = a + b
CALL CM_timer_stop(3)
t = CM_timer_read_elapsed(3)
times(3,k) = t
CALL CM_timer_clear(4)
CALL CM_timer_start(4)
c = a + 3.0 * b
CALL CM_timer_stop(4)
t = CM_timer_read_elapsed(4)
times(4,k) = t
60 CONTINUE
* --- SUMMARY ---
rmstime = SUM(times**2, DIM=2)
rmstime = SQRT( rmstime/float(ntimes) )
mintime = MINVAL(times, DIM=2)
maxtime = MAXVAL(times, DIM=2)
WRITE (*,FMT=9000)
DO 90 j = 1,4
WRITE (*,FMT=9010) label(j),n*bytes(j)*nbpw/mintime(j)/1.0e6,
$ rmstime(j),mintime(j),maxtime(j)
90 CONTINUE
9000 FORMAT (/1x,57('-'),/,' Function :',1x,
$ 'Rate (MB/s) RMS time Min time Max time')
9010 FORMAT (a,4(f10.4,2x))
END
*-------------------------------------
* INTEGER FUNCTION realsize()
*
* A semi-portable way to determine the precision of default REAL
* in Fortran.
* Here used to guess how many bytes of storage a real number occupies.
*
INTEGER FUNCTION realsize()
C Test #1 - compare double precision pi to acos(-1.0e0)
C .. Local Scalars ..
DOUBLE PRECISION pi
REAL diff,picalc,result,test
INTEGER j,ndigits
C ..
C .. Local Arrays ..
DOUBLE PRECISION ref(30)
C ..
C .. External Subroutines ..
EXTERNAL dummy
C ..
C .. Intrinsic Functions ..
INTRINSIC abs,acos,log10,sqrt
C ..
pi = 3.14159265358979323846264338327950288d0
picalc = acos(-1.0e0)
diff = abs(picalc-pi)
IF (diff.EQ.0.0) THEN
PRINT *,'Test #1 Failed = picalc=piexact'
PRINT *,'Apparently Single=Double Precision'
PRINT *,'Proceeding to Test #2'
PRINT *,' '
GO TO 10
ELSE
ndigits = -log10(abs(diff)) + 0.5
GO TO 50
END IF
C Test #2 - compare single(1.0d0+delta) to 1.0e0
10 DO 20 j = 1,30
ref(j) = 1.0d0 + 10.0d0** (-j)
20 CONTINUE
DO 30 j = 1,30
test = ref(j)
ndigits = j
CALL dummy(test,result)
IF (test.EQ.1.0e0) THEN
GO TO 50
END IF
30 CONTINUE
PRINT *,'Test #2 failed - Precision appears to exceed 30 digits'
PRINT *,'Proceeding to Test #3'
GO TO 40
C Test #3 - abs(sqrt(1.0d0)-sqrt(1.0e0))
40 diff = abs(sqrt(1.0d0)-sqrt(1.0e0))
IF (diff.EQ.0.0) THEN
PRINT *,'Test Failed - sqrt(1.0e0)=sqrt(1.0d0)'
PRINT *,'Apparently Single=Double Precision'
PRINT *,'Giving up'
GO TO 60
ELSE
ndigits = -log10(abs(diff)) + 0.5
GO TO 50
END IF
50 WRITE (*,FMT='(a)') '--------------------------------------'
WRITE (*,FMT='(1x,a,i2,a)') 'Single precision appears to have ',
$ ndigits,' digits of accuracy'
IF (ndigits.LE.8) THEN
realsize = 4
ELSE
realsize = 8
END IF
WRITE (*,FMT='(1x,a,i1,a)') 'Assuming ',realsize,
$ ' bytes per default REAL word'
WRITE (*,FMT='(a)') '--------------------------------------'
RETURN
60 PRINT *,'Hmmmm. I am unable to determine the size of a REAL'
PRINT *,'Please enter the number of Bytes per REAL number : '
READ (*,FMT=*) realsize
IF (realsize.NE.4 .AND. realsize.NE.8) THEN
PRINT *,'Your answer ',realsize,' does not make sense!'
PRINT *,'Try again!'
PRINT *,'Please enter the number of Bytes per ',
$ 'REAL number : '
READ (*,FMT=*) realsize
END IF
PRINT *,'You have manually entered a size of ',realsize,
$ ' bytes per REAL number'
WRITE (*,FMT='(a)') '--------------------------------------'
END
SUBROUTINE dummy(q,r)
C .. Scalar Arguments ..
REAL q,r
C ..
C .. Intrinsic Functions ..
INTRINSIC cos
C ..
r = cos(q)
RETURN
END
This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:02 CDT