* Program: Stream
* Programmer: John D. McCalpin
* Revision: 2.0, September 30,1991
*
* CM5 Data Parallel version by Rob Womersely 19 April, 1993
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in Fortran. These numbers reveal the
* quality of code generation for simple uncacheable kernels as well
* as showing the cost of floating-point operations relative to memory
* accesses.
*
* INSTRUCTIONS:
* 1) Stream requires a cpu timing function called second().
* A sample is shown below. This is unfortunately rather
* system dependent. It helps to know the granularity of the
* timing. The code below assumes that the granularity is
* 1/100 seconds.
* 2) Stream requires a good bit of memory to run.
* Adjust the Parameter 'N' in the second line of the main
* program to give a 'timing calibration' of at least 20 clicks.
* This will provide rate estimates that should be good to
* about 5% precision.
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonable good, on the
* other hand, the optimizer might be too smart for me!
* 4) Mail the results to mccalpin@perelandra.cms.udel.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
*
* Thanks!
*
PROGRAM stream
C .. Parameters ..
INTEGER, PARAMETER :: n = 5000000, ntimes = 20
C ..
C .. Local Scalars ..
DOUBLE PRECISION t, t0
INTEGER j, k, nbpw, nvu
C ..
C .. Local Arrays ..
DOUBLE PRECISION, ARRAY(n) :: a, b, c
CMF$ LAYOUT a(:news), b(:news), c(:news)
DOUBLE PRECISION, ARRAY(4) :: maxtime,mintime,rmstime
DOUBLE PRECISION, ARRAY(4, ntimes) :: times
INTEGER bytes(4)
CHARACTER label(4)*12
C ..
C .. External Functions ..
INTEGER CMF_number_of_processors
DOUBLE PRECISION CM_timer_read_cm_busy, CM_timer_read_cm_idle
DOUBLE PRECISION CM_timer_read_elapsed
EXTERNAL CMF_number_of_processors
EXTERNAL CM_timer_read_cm_busy, CM_timer_read_cm_idle
EXTERNAL CM_timer_read_elapsed
INTEGER realsize
EXTERNAL realsize
C ..
C .. Intrinsic Functions ..
INTRINSIC dble,max,min,sqrt
C ..
C .. Data statements ..
DATA label/' Assignment:',' Scaling :',' Summing :',
$ ' SAXPYing :'/
DATA bytes/2,2,3,3/
C ..
* --- SETUP --- determine precision and check timing ---
PRINT *,'STREAM: Measure memory transfer rates in MB/s'
PRINT *,'for simple computational kernels in Fortran'
PRINT *
PRINT *,'CALL CMF_describe_array(a)'
CALL CMF_describe_array(a)
PRINT *
nvu = CMF_number_of_processors()
WRITE(*,'(/1x,A,I2,A,I2,A/)') 'CM5 with partition of ',nvu/4,
$ ' processors ( ',nvu,' vector units )'
nbpw = realsize()
CALL CM_timer_clear(0)
CALL CM_timer_start(0)
a = 1.0D0
b = 2.0D0
c = 0.0D0
CALL CM_timer_stop(0)
t = CM_timer_read_elapsed(0)
PRINT *
PRINT *,'Vector length = ', n
PRINT *,'Timing calibration: Time = ',t*100,' hundredths',
$ ' of a second'
PRINT *,'Increase the size of the arrays if this is < 30'
PRINT *,'and your clock precision is =< 1/100 second'
* --- MAIN LOOP --- repeat test cases NTIMES times ---
DO 60 k = 1, ntimes
CALL CM_timer_clear(1)
CALL CM_timer_start(1)
c = a
CALL CM_timer_stop(1)
t = CM_timer_read_elapsed(1)
times(1,k) = t
CALL CM_timer_clear(2)
CALL CM_timer_start(2)
c = 3.0D0 * a
CALL CM_timer_stop(2)
t = CM_timer_read_elapsed(2)
times(2,k) = t
CALL CM_timer_clear(3)
CALL CM_timer_start(3)
c = a + b
CALL CM_timer_stop(3)
t = CM_timer_read_elapsed(3)
times(3,k) = t
CALL CM_timer_clear(4)
CALL CM_timer_start(4)
c = a + 3.0D0 * b
CALL CM_timer_stop(4)
t = CM_timer_read_elapsed(4)
times(4,k) = t
60 CONTINUE
* --- SUMMARY ---
rmstime = SUM(times**2, DIM=2)
rmstime = SQRT( rmstime/dble(ntimes) )
mintime = MINVAL(times, DIM=2)
maxtime = MAXVAL(times, DIM=2)
WRITE (*,FMT=9000)
DO 90 j = 1,4
WRITE (*,FMT=9010) label(j),n*bytes(j)*nbpw/mintime(j)/1.0D6,
$ rmstime(j),mintime(j),maxtime(j)
90 CONTINUE
9000 FORMAT (/1x, 57('-'),/,' Function :',1x,
$ 'Rate (MB/s) RMS time Min time Max time')
9010 FORMAT (a,4(f10.4,2x))
END
*-------------------------------------
* INTEGER FUNCTION dblesize()
*
* A semi-portable way to determine the precision of DOUBLEPRECISION
* in Fortran.
* Here used to guess how many bytes of storage a DOUBLEPRECISION
* number occupies.
*
INTEGER FUNCTION realsize()
C .. Local Scalars ..
DOUBLE PRECISION result,test
INTEGER j,ndigits
C ..
C .. Local Arrays ..
DOUBLE PRECISION ref(30)
C ..
C .. External Subroutines ..
EXTERNAL dummy
C ..
C .. Intrinsic Functions ..
INTRINSIC abs,acos,log10,sqrt
C ..
C Test #1 - compare single(1.0d0+delta) to 1.0d0
10 DO 20 j = 1,30
ref(j) = 1.0d0 + 10.0d0**(-j)
20 CONTINUE
DO 30 j = 1,30
test = ref(j)
ndigits = j
CALL dummy(test,result)
IF (test.EQ.1.0D0) THEN
GO TO 50
END IF
30 CONTINUE
GOTO 60
50 WRITE (*,FMT='(a)') ' --------------------------------------'
WRITE (*,FMT='(1x,a,i2,a)') 'Double precision appears to have ',
$ ndigits,' digits of accuracy'
IF (ndigits.LE.8) THEN
realsize = 4
ELSE
realsize = 8
END IF
WRITE (*,FMT='(1x,a,i1,a)') 'Assuming ',realsize,
$ ' bytes per DOUBLEPRECISION word'
WRITE (*,FMT='(a)') ' --------------------------------------'
RETURN
60 PRINT *,' Hmmmm. I am unable to determine the size of a REAL'
PRINT *,' Please enter the number of Bytes per DOUBLEPRECISION',
$ ' number : '
READ (*,FMT=*) realsize
IF (realsize.NE.4 .AND. realsize.NE.8) THEN
PRINT *,' Your answer ',realsize,' does not make sense!'
PRINT *,' Try again!'
PRINT *,' Please enter the number of Bytes per ',
$ 'REAL number : '
READ (*,FMT=*) realsize
END IF
PRINT *,'You have manually entered a size of ',realsize,
$ ' bytes per REAL number'
WRITE (*,FMT='(a)') '--------------------------------------'
END
SUBROUTINE dummy(q,r)
C .. Scalar Arguments ..
DOUBLE PRECISION q,r
C ..
C .. Intrinsic Functions ..
INTRINSIC cos
C ..
r = cos(q)
RETURN
END
This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:02 CDT