CM5 stream_s.fcm

From: rsw@hydra.maths.unsw.EDU.AU
Date: Thu Apr 29 1993 - 20:34:57 CDT

Next message: rsw@hydra.maths.unsw.EDU.AU: "CM5 Stream_d results"
Previous message: rsw@hydra.maths.unsw.EDU.AU: "CM5 stream_d.fcm"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

* Program: Stream
* Programmer: John D. McCalpin
* Revision: 2.0, September 30,1991
*
* CM5 Data Parallel version by Rob Womersely 19 April, 1993
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in Fortran. These numbers reveal the
* quality of code generation for simple uncacheable kernels as well
* as showing the cost of floating-point operations relative to memory
* accesses.
*
* INSTRUCTIONS:
* 1) Stream requires a cpu timing function called second().
* A sample is shown below. This is unfortunately rather
* system dependent. It helps to know the granularity of the
* timing. The code below assumes that the granularity is
* 1/100 seconds.
* 2) Stream requires a good bit of memory to run.
* Adjust the Parameter 'N' in the second line of the main
* program to give a 'timing calibration' of at least 20 clicks.
* This will provide rate estimates that should be good to
* about 5% precision.
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonable good, on the
* other hand, the optimizer might be too smart for me!
* 4) Mail the results to mccalpin@perelandra.cms.udel.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
*
* Thanks!
*
      PROGRAM stream
C .. Parameters ..
      INTEGER, PARAMETER :: n = 5000000, ntimes = 20
C ..
C .. Local Scalars ..
      REAL t, t0
      INTEGER j, k, nbpw, nvu
C ..
C .. Local Arrays ..
      REAL, ARRAY(n) :: a, b, c
CMF$ LAYOUT a(:news), b(:news), c(:news)

REAL, ARRAY(4) :: maxtime,mintime,rmstime
REAL, ARRAY(4, ntimes) :: times

      INTEGER bytes(4)
      CHARACTER label(4)*12
C ..
C .. External Functions ..
      INTEGER CMF_number_of_processors
      DOUBLE PRECISION CM_timer_read_cm_busy, CM_timer_read_cm_idle
      DOUBLE PRECISION CM_timer_read_elapsed
      EXTERNAL CMF_number_of_processors
      EXTERNAL CM_timer_read_cm_busy, CM_timer_read_cm_idle
      EXTERNAL CM_timer_read_elapsed

      INTEGER realsize
      EXTERNAL realsize
C ..
C .. Intrinsic Functions ..
      INTRINSIC float,max,min,sqrt
C ..
C .. Data statements ..
      DATA label/' Assignment:',' Scaling :',' Summing :',
     $ ' SAXPYing :'/
      DATA bytes/2,2,3,3/
C ..

* --- SETUP --- determine precision and check timing ---

      PRINT *,'STREAM: Measure memory transfer rates in MB/s'
      PRINT *,'for simple computational kernels in Fortran'
      PRINT *
      PRINT *,'CALL CMF_describe_array(a)'
      CALL CMF_describe_array(a)

      nvu = CMF_number_of_processors()
      WRITE(*,'(/1x,A,I2,A,I2,A/)') 'CM5 with partition of ',nvu/4,
     $ ' processors ( ',nvu,' vector units )'

nbpw = realsize()

      CALL CM_timer_clear(0)
      CALL CM_timer_start(0)
      a = 1.0
      b = 2.0
      c = 0.0
      CALL CM_timer_stop(0)
      t = CM_timer_read_cm_busy(0)
      PRINT *
      PRINT *,'Vector length = ', n
      PRINT *,'Timing calibration: Time = ',t*100,' hundredths',
     $ ' of a second'
      PRINT *,'Increase the size of the arrays if this is < 30'
      PRINT *,'and your clock precision is = < 1/100 second'

* --- MAIN LOOP --- repeat test cases NTIMES times ---
DO 60 k = 1, ntimes

          CALL CM_timer_clear(1)
          CALL CM_timer_start(1)
          c = a
          CALL CM_timer_stop(1)
          t = CM_timer_read_elapsed(1)
          times(1,k) = t

          CALL CM_timer_clear(2)
          CALL CM_timer_start(2)
          c = 3.0 * a
          CALL CM_timer_stop(2)
          t = CM_timer_read_elapsed(2)
          times(2,k) = t

          CALL CM_timer_clear(3)
          CALL CM_timer_start(3)
          c = a + b
          CALL CM_timer_stop(3)
          t = CM_timer_read_elapsed(3)
          times(3,k) = t

          CALL CM_timer_clear(4)
          CALL CM_timer_start(4)
          c = a + 3.0 * b
          CALL CM_timer_stop(4)
          t = CM_timer_read_elapsed(4)
          times(4,k) = t

60 CONTINUE

* --- SUMMARY ---
      rmstime = SUM(times**2, DIM=2)
      rmstime = SQRT( rmstime/float(ntimes) )
      mintime = MINVAL(times, DIM=2)
      maxtime = MAXVAL(times, DIM=2)
      WRITE (*,FMT=9000)
      DO 90 j = 1,4
          WRITE (*,FMT=9010) label(j),n*bytes(j)*nbpw/mintime(j)/1.0e6,
     $ rmstime(j),mintime(j),maxtime(j)
   90 CONTINUE

9000 FORMAT (/1x,57('-'),/,' Function :',1x,
$ 'Rate (MB/s) RMS time Min time Max time')
9010 FORMAT (a,4(f10.4,2x))
END

*-------------------------------------
* INTEGER FUNCTION realsize()
*
* A semi-portable way to determine the precision of default REAL
* in Fortran.
* Here used to guess how many bytes of storage a real number occupies.
*
INTEGER FUNCTION realsize()

C Test #1 - compare double precision pi to acos(-1.0e0)

C .. Local Scalars ..
      DOUBLE PRECISION pi
      REAL diff,picalc,result,test
      INTEGER j,ndigits
C ..
C .. Local Arrays ..
      DOUBLE PRECISION ref(30)
C ..
C .. External Subroutines ..
      EXTERNAL dummy
C ..
C .. Intrinsic Functions ..
      INTRINSIC abs,acos,log10,sqrt
C ..
      pi = 3.14159265358979323846264338327950288d0
      picalc = acos(-1.0e0)
      diff = abs(picalc-pi)
      IF (diff.EQ.0.0) THEN
          PRINT *,'Test #1 Failed = picalc=piexact'
          PRINT *,'Apparently Single=Double Precision'
          PRINT *,'Proceeding to Test #2'
          PRINT *,' '
          GO TO 10
      ELSE
          ndigits = -log10(abs(diff)) + 0.5
          GO TO 50
      END IF

C Test #2 - compare single(1.0d0+delta) to 1.0e0

   10 DO 20 j = 1,30
          ref(j) = 1.0d0 + 10.0d0** (-j)
   20 CONTINUE

      DO 30 j = 1,30
          test = ref(j)
          ndigits = j
          CALL dummy(test,result)
          IF (test.EQ.1.0e0) THEN
              GO TO 50
          END IF
   30 CONTINUE
      PRINT *,'Test #2 failed - Precision appears to exceed 30 digits'
      PRINT *,'Proceeding to Test #3'
      GO TO 40

C Test #3 - abs(sqrt(1.0d0)-sqrt(1.0e0))

   40 diff = abs(sqrt(1.0d0)-sqrt(1.0e0))
      IF (diff.EQ.0.0) THEN
          PRINT *,'Test Failed - sqrt(1.0e0)=sqrt(1.0d0)'
          PRINT *,'Apparently Single=Double Precision'
          PRINT *,'Giving up'
          GO TO 60
      ELSE
          ndigits = -log10(abs(diff)) + 0.5
          GO TO 50
      END IF

   50 WRITE (*,FMT='(a)') '--------------------------------------'
      WRITE (*,FMT='(1x,a,i2,a)') 'Single precision appears to have ',
     $ ndigits,' digits of accuracy'
      IF (ndigits.LE.8) THEN
          realsize = 4
      ELSE
          realsize = 8
      END IF
      WRITE (*,FMT='(1x,a,i1,a)') 'Assuming ',realsize,
     $ ' bytes per default REAL word'
      WRITE (*,FMT='(a)') '--------------------------------------'
      RETURN

   60 PRINT *,'Hmmmm. I am unable to determine the size of a REAL'
      PRINT *,'Please enter the number of Bytes per REAL number : '
      READ (*,FMT=*) realsize
      IF (realsize.NE.4 .AND. realsize.NE.8) THEN
          PRINT *,'Your answer ',realsize,' does not make sense!'
          PRINT *,'Try again!'
          PRINT *,'Please enter the number of Bytes per ',
     $ 'REAL number : '
          READ (*,FMT=*) realsize
      END IF
      PRINT *,'You have manually entered a size of ',realsize,
     $ ' bytes per REAL number'
      WRITE (*,FMT='(a)') '--------------------------------------'
      END

      SUBROUTINE dummy(q,r)
C .. Scalar Arguments ..
      REAL q,r
C ..
C .. Intrinsic Functions ..
      INTRINSIC cos
C ..
      r = cos(q)
      RETURN
      END

Next message: rsw@hydra.maths.unsw.EDU.AU: "CM5 Stream_d results"
Previous message: rsw@hydra.maths.unsw.EDU.AU: "CM5 stream_d.fcm"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:02 CDT