Modified Stream prog.

From: Charles Grassl (cmg@ferrari.cray.com)
Date: Mon Jul 24 1995 - 14:03:42 CDT

Next message: Charles Grassl: "STREAM for 512 PE CRAY T3D"
Previous message: Charles Grassl: "Re: Stream results for T90"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

John;

Below is a copy of the STREAM program which uses different starting points for
each array.

Charles Grassl
Cray Research, Inc.

* Program: Stream
* Programmer: John D. McCalpin
* Revision: 2.0, September 30,1991
*
* This program measures memory transfer rates in MB/s for simple
* computational kernels coded in Fortran. These numbers reveal the
* quality of code generation for simple uncacheable kernels as well
* as showing the cost of floating-point operations relative to memory
* accesses.
*
* INSTRUCTIONS:
* 1) Stream requires a cpu timing function called second().
* A sample is shown below. This is unfortunately rather
* system dependent. It helps to know the granularity of the
* timing. The code below assumes that the granularity is
* 1/100 seconds.
* 2) Stream requires a good bit of memory to run.
* Adjust the Parameter 'N' in the second line of the main
* program to give a 'timing calibration' of at least 20 clicks.
* This will provide rate estimates that should be good to
* about 5% precision.
* 3) Compile the code with full optimization. Many compilers
* generate unreasonably bad code before the optimizer tightens
* things up. If the results are unreasonable good, on the
* other hand, the optimizer might be too smart for me!
* 4) Mail the results to mccalpin@perelandra.cms.udel.edu
* Be sure to include:
* a) computer hardware model number and software revision
* b) the compiler flags
* c) all of the output from the test case.
*
* Thanks!
*
        program Stream
        parameter (N = 4 001 *1024, NTIMES = 2)
        integer ipad,aoff,boff,coff
        parameter (ipad = 20248 )
        real a(N+ipad),b(N+ipad),c(N+ipad),times(4,NTIMES)
        common /abccom/a,b,c
        real rmstime(4),mintime(4),maxtime(4)
        character*11 label(4)
        real second
        integer realsize,nbpw,bytes(4)
        external second,realsize
        data rmstime/4*0.0/,mintime/4*1.0e+36/,maxtime/4*0.0/
        data label/'Assignment:','Scaling: ','Summing: ',
     $ 'SAXPYing: '/
        data bytes/2,2,3,3/
        data aoff,boff,coff/0,0,0/
        etime()=0.001*timef()

* --- SETUP --- determine precision and check timing ---

write(6,9020) ncpu(),N/1024,ipad

write(6,9030) (label(j),j=1,4)

nbpw = realsize()

        t = etime()
        do 10 j=1,N
            a(j) = 1.0
            b(j) = 2.0
            c(j) = 0.0
   10 continue
        t = etime()-t

c print *,'Timing calibration ; time = ',t*100,' hundredths',
c $ ' of a second'
c print *,'Increase the size of the arrays if this is <30 ',
c $ ' and your clock precision is =<1/100 second'
c print *,'---------------------------------------------------'

        do coff=8,32,8
        do boff=8,32,8
        do aoff=8,32,8
* --- MAIN LOOP --- repeat test cases NTIMES times ---
        do 1000 k=1,NTIMES

            t = etime()
            do 20 j=1,N
                c(coff+j) = a(aoff+j)
   20 continue
            t = etime()-t
            times(1,k) = t

            t = etime()
            do 30 j=1,N
                c(coff+j) = 3.0e0*a(aoff+j)
   30 continue
            t = etime()-t
            times(2,k) = t

            t = etime()
            do 40 j=1,N
                c(coff+j) = a(aoff+j)+b(boff+j)
   40 continue
            t = etime()-t
            times(3,k) = t

            t = etime()
            do 50 j=1,N
                c(coff+j) = a(aoff+j)+3.0e0*b(boff+j)
   50 continue
            t = etime()-t
            times(4,k) = t
            call dummysub(a,b,c,n)
1000 continue

* --- SUMMARY ---
        do 300 k=1,NTIMES
            do 200 j=1,4
                rmstime(j) = rmstime(j) + times(j,k)**2
                mintime(j) = min( mintime(j), times(j,k) )
                maxtime(j) = max( maxtime(j), times(j,k) )
  200 continue
  300 continue

write(6,9040) aoff,boff,coff,
. (N*bytes(j)*nbpw/mintime(j)/1.0e6,j=1,4)

c write (*,9000)
c do 320 j=1,4
c rmstime(j) = sqrt(rmstime(j)/float(NTIMES))
c write (*,9010) label(j),N*bytes(j)*nbpw/mintime(j)/1.0e6,
c $ rmstime(j),mintime(j),maxtime(j)
c 320 continue
        end do
        end do
        end do

9000 format (' Function',5x,
$ 'Rate (MB/s) RMS time Min time Max time')
9010 format (1x,a11,4(2x,f11.4))

9020 format(/' STREAM benchmark'
     . /' ----------------'
     . /' Number of CPUs: ',i8
     . /' Array size: ',i8,' Kwords'
     . /' Array padding: ',i8,' Words'/)

9030 format (/1x,' Aoff Boff Coff',2x,4a11
. /1x,61('-'))
9040 format (1x,3i5,2x,4f11.3)

stop
end

*-------------------------------------
* Sample timing routine
* This code works on Sun and Silicon Graphics machines.
* real function second(t0)
* real dummy(2)
* second = etime(dummy)
* end
* Sample timing routine
* This code works on IBM RS/6000 machines
cray real function second(t0)
cray second = mclock()*0.01
cray end

*-------------------------------------
* INTEGER FUNCTION realsize()
*
* A semi-portable way to determine the precision of default REAL
* in Fortran.
* Here used to guess how many bytes of storage a real number occupies.
*
        integer function realsize()
        double precision ref(30)
        real test
        double precision pi
        logical silent
        data silent/.true./

C Test #1 - compare double precision pi to acos(-1.0e0)

        pi = 3.14159 26535 89793 23846 26433 83279 50288 d0
        picalc = acos(-1.0e0)
        diff = abs(picalc-pi)
        if (diff.eq.0.0) then
           if (.not. silent) then
             print *,'Test #1 Failed = picalc=piexact'
            print *,'Apparently Single=Double Precision'
            print *,'Proceeding to Test #2'
            print *,' '
           end if
            goto 200
        else
            ndigits = -log10(abs(diff))+0.5
            goto 1000
        endif

C Test #2 - compare single(1.0d0+delta) to 1.0e0

  200 do 10 j=1,30
            ref(j) = 1.0d0+10.0d0**(-j)
   10 continue

        do 20 j=1,30
            test = ref(j)
            ndigits = j
            call dummy(test,result)
            if (test.eq.1.0e0) then
                goto 1000
            endif
   20 continue
        if (.not. silent) then
        print *,'Test #2 failed - Precision appears to exceed 30 digits'
        print *,'Proceeding to Test #3'
        end if
        goto 300

C Test #3 - abs(sqrt(1.0d0)-sqrt(1.0e0))

  300 diff = abs(sqrt(1.0d0)-sqrt(1.0e0))
        if (diff.eq.0.0) then
           if (.not. silent) then
            print *,'Test Failed - sqrt(1.0e0)=sqrt(1.0d0)'
            print *,'Apparently Single=Double Precision'
            print *,'Giving up'
           end if
            goto 400
        else
            ndigits = -log10(abs(diff))+0.5
            goto 1000
        endif

1000 continue
        if (.not. silent) then
        write (*,'(a)') '--------------------------------------'
        write (*,'(1x,a,i2,a)') 'Single precision appears to have ',
     $ ndigits,' digits of accuracy'
        write (*,'(1x,a,i1,a)') 'Assuming ',realsize,
     $ ' bytes per default REAL word'
        write (*,'(a)') '--------------------------------------'
        end if
        if (ndigits.le.8) then
            realsize = 4
        else
            realsize = 8
        endif
        return

  400 print *,'Hmmmm. I am unable to determine the size of a REAL'
          print *,'Please enter the number of Bytes per REAL number : '
          read (*,*) realsize
        if (realsize.ne.4.and.realsize.ne.8) then
            print *,'Your answer ',realsize,' does not make sense!'
            print *,'Try again!'
              print *,'Please enter the number of Bytes per ',
     $ 'REAL number : '
              read (*,*) realsize
        endif
        print *,'You have manually entered a size of ',realsize,
     $ ' bytes per REAL number'
        write (*,'(a)') '--------------------------------------'
        end

        subroutine dummy(q,r)
        r = cos(q)
        return
        end
        subroutine dummysub(a,b,c,n)
        return
        end

Next message: Charles Grassl: "STREAM for 512 PE CRAY T3D"
Previous message: Charles Grassl: "Re: Stream results for T90"
Messages sorted by: [ date ] [ thread ] [ subject ] [ author ]

This archive was generated by hypermail 2b29 : Tue Apr 18 2000 - 05:23:05 CDT