1#!/bin/bash
2
3########################
4# Function definitions #
5########################
6
7source "$(dirname $0)/measurement-functions"
8
9function run_test {
10  local tmp avg1 stddev1 avg2 stddev2 avg4 stddev4 p
11
12  tmp="/tmp/test-timing.$$"
13
14  rm -f "${tmp}"
15  p=1
16  test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
17  read avg1 stddev1 vsz1 vszdev1 rest < "$tmp"
18  echo "Average time: ${avg1} +/- ${stddev1} seconds." \
19       " VSZ: ${vsz1} +/- ${vszdev1} KB"
20
21  if [ "${rest}" != "" ]; then
22    echo "Internal error ($rest)"
23    exit 1
24  fi
25
26  rm -f "${tmp}"
27  p=2
28  test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
29  read avg2 stddev2 vsz2 vszdev2 rest < "$tmp"
30  echo "Average time: ${avg2} +/- ${stddev2} seconds." \
31       " VSZ: ${vsz2} +/- ${vszdev2} KB"
32
33  if [ "${rest}" != "" ]; then
34    echo "Internal error ($rest)"
35    exit 1
36  fi
37
38  rm -f "${tmp}"
39  p=4
40  test_output="${1}-p${p}.out" measure_runtime "$@" -p${psep}${p} "${test_args}" | avgstddev > "$tmp"
41  read avg4 stddev4 vsz4 vszdev4 rest < "$tmp"
42  echo "Average time: ${avg4} +/- ${stddev4} seconds." \
43       " VSZ: ${vsz4} +/- ${vszdev4} KB"
44  rm -f "$tmp"
45
46  if [ "${rest}" != "" ]; then
47    echo "Internal error ($rest)"
48    exit 1
49  fi
50
51  p=1
52  test_output="/dev/null" \
53  print_runtime_ratio "${avg1}" "${stddev1}" "${vsz1}" "${vszdev1}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
54
55  p=4
56  test_output="/dev/null" \
57  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=none "$@" -p${psep}${p} "${test_args}"
58
59  p=4
60  test_output="${1}-drd-with-stack-var-4.out" \
61  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
62    "$VG" --tool=drd --first-race-only=yes --check-stack-var=yes \
63    --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
64
65  p=4
66  test_output="${1}-drd-without-stack-var-4.out" \
67  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" \
68    "$VG" --tool=drd --first-race-only=yes --check-stack-var=no \
69    --drd-stats=yes "$@" -p${psep}${p} "${test_args}"
70
71  p=4
72  test_output="${1}-helgrind-4-none.out" \
73  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=none "$@" -p${psep}${p} "${test_args}"
74
75  p=4
76  test_output="${1}-helgrind-4-approx.out" \
77  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=approx "$@" -p${psep}${p} "${test_args}"
78
79  p=4
80  test_output="${1}-helgrind-4-full.out" \
81  print_runtime_ratio "${avg4}" "${stddev4}" "${vsz4}" "${vszdev4}" "$VG" --tool=helgrind --history-level=full "$@" -p${psep}${p} "${test_args}"
82
83  echo ''
84}
85
86
87########################
88# Script body          #
89########################
90
91DRD_SCRIPTS_DIR="$(dirname $0)"
92if [ "${DRD_SCRIPTS_DIR:0:1}" != "/" ]; then
93  DRD_SCRIPTS_DIR="$PWD/$DRD_SCRIPTS_DIR"
94fi
95
96SPLASH2="${DRD_SCRIPTS_DIR}/../splash2"
97if [ ! -e "${SPLASH2}" ]; then
98  echo "Error: splash2 directory not found (${SPLASH2})."
99  exit 1
100fi
101
102if [ "$VG" = "" ]; then
103  VG="${DRD_SCRIPTS_DIR}/../../vg-in-place"
104fi
105
106if [ ! -e "$VG" ]; then
107  echo "Could not find $VG."
108  exit 1
109fi
110
111######################################################################################################################
112# Meaning of the different colums:
113#  1. SPLASH2 test name.
114#  2. Execution time in seconds for native run with argument -p1.
115#  3. Virtual memory size in KB for the native run with argument -p1.
116#  4. Execution time in seconds for native run with argument -p2.
117#  5. Virtual memory size in KB for the native run with argument -p2.
118#  6. Execution time in seconds for native run with argument -p4.
119#  7. Virtual memory size in KB for the native run with argument -p4.
120#  8. Execution time ratio for --tool=none -p1 versus -p1.
121#  9. Virtual memory size ratio for --tool=none -p1 versus -p1.
122# 10. Execution time ratio for --tool=none -p4 versus -p4.
123# 11. Virtual memory size ratio for --tool=none -p4 versus -p4.
124# 12. Execution time ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
125# 13. Virtual memory size ratio for --tool=drd --check-stack-var=yes -p4 versus -p4.
126# 14. Execution time ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
127# 15. Virtual memory size ratio for --tool=drd --check-stack-var=no -p4 versus -p4.
128# 16. Execution time ratio for --tool=helgrind --history-level=none -p4 versus -p4.
129# 17. Virtual memory size ratio for --tool=helgrind --history-level=none -p4 versus -p4.
130# 18. Execution time ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
131# 19. Virtual memory size ratio for --tool=helgrind --history-level=approx -p4 versus -p4.
132# 20. Execution time ratio for --tool=helgrind --history-level=full -p4 versus -p4.
133# 21. Virtual memory size ratio for --tool=helgrind --history-level=full -p4 versus -p4.
134# 22. Execution time ratio for Intel Thread Checker -p4 versus -p4.
135# 23. Execution time ratio for Intel Thread Checker -p4 versus -p4.
136#
137# Notes:
138# - Both Helgrind and DRD use a granularity of one byte for data race detection.
139# - Helgrind does detect data races on stack variables. DRD only detects
140#   data races on stack variables with --check-stack-var=yes.
141# - The ITC tests have been run on a 4-way 2.5 GHz Pentium 4 workstation, most
142#   likely running a 32-bit OS. Not yet clear to me: which OS ? Which
143#   granularity does ITC use ? And which m4 macro's have been used by ITC as
144#   implementation of the synchronization primitives ?
145#
146#     1                    2     3      4      5      6     7     8    9   10   11   12  13     14  15    16    17  18    19  20    21   22   23
147################################################################################################################################################
148# Results:                native       native       native       none      none       DRD        DRD      HG        HG        HG         ITC ITC
149#                         -p1          -p2          -p4           -p1       -p4       -p4       -p4+f     -p4       -p4       -p4      -p4 -p4+f
150# ..............................................................................................................................................
151# Cholesky                0.11  12016  0.06  22016  0.55  41328 10.3  4.92  1.7 2.14   15 2.61    8 2.61   10  3.96  10  3.96  15  6.14  239  82
152# FFT                     0.02   6692  0.02  14888  0.02  31621 17.0  8.01 20.0 2.48  114 3.15   64 3.28   81  4.52  81  4.52 116  5.56   90  41
153# LU, contiguous          0.08   4100  0.05  12304  0.06  28712 11.1 12.44 18.5 2.64  104 3.18   70 3.18   87  4.84  89  4.84 118  5.55  428 128
154# Ocean, contiguous       0.23  16848  0.19  25384  0.23  42528  6.3  3.78  8.3 2.11   87 2.82   62 4.02   71  3.75  71  3.75 195  5.96   90  28
155# Radix                   0.21  15136  0.14  23336  0.15  39728 12.6  4.10 22.3 2.19   61 2.87   41 2.94   52  4.03  52  4.03  85  6.13  222  56
156# Raytrace                0.63 207104  0.49 215296  0.49 231680  8.9  1.23 12.9 1.20  385 1.38   86 2.10  158  3.70 160  3.70 222  4.15  172  53
157# Water-n2                0.18  10696  0.09  27072  0.11  59832 12.5  5.46 26.7 1.80 3092 3.03  263 3.06   92  3.28  92  3.28  92  3.55  189  39
158# Water-sp                0.20   4444  0.15  13536  0.10  30269 10.6 11.56 27.0 2.52  405 3.29   69 3.42   95  4.59  95  4.59  97  4.73  183  34
159# ..............................................................................................................................................
160# geometric mean          0.14  13024  0.10  25669  0.14  47655 10.8  5.26 13.5 2.08  161 2.71   59 3.03   66  4.05  66  4.05  95  5.13  180  51
161# ..............................................................................................................................................
162# Hardware: dual-core Intel Core2 Duo E6750, 2.66 GHz, 4 MB L2 cache, 2 GB RAM.                                                        
163# Software: openSUSE 11.0 (64-bit edition), runlevel 3, kernel 2.6.30.1, gcc 4.3.1, 32 bit SPLASH-2 executables, valgrind trunk r10648.
164################################################################################################################################################
165
166####
167# Notes:
168# - The ITC performance numbers in the above table originate from table 1 in
169#   the following paper:
170#   Paul Sack, Brian E. Bliss, Zhiqiang Ma, Paul Petersen, Josep Torrellas,
171#   Accurate and efficient filtering for the Intel thread checker race
172#   detector, Proceedings of the 1st workshop on Architectural and system
173#   support for improving software dependability, San Jose, California,
174#   2006. Pages: 34 - 41.
175# - The input parameters for benchmarks below originate from table 1 in the
176#   following paper:
177#   The SPLASH-2 programs: characterization and methodological considerations
178#   Woo, S.C.; Ohara, M.; Torrie, E.; Singh, J.P.; Gupta, A.
179#   1995. Proceedings of the 22nd Annual International Symposium on Computer
180#   Architecture, 22-24 Jun 1995, Page(s): 24 - 36.
181#   ftp://www-flash.stanford.edu/pub/splash2/splash2_isca95.ps.Z
182####
183
184cache_size=$(get_cache_size)
185log2_cache_size=$(log2 ${cache_size})
186
187# Cholesky
188(
189  cd ${SPLASH2}/codes/kernels/cholesky/inputs
190  for f in *Z
191  do
192    gzip -cd <$f >${f%.Z}
193  done
194  test_args=tk15.O run_test ../CHOLESKY -C$((cache_size))
195)
196
197# FFT
198run_test ${SPLASH2}/codes/kernels/fft/FFT -t -l$((log2_cache_size/2)) -m16
199
200# LU, contiguous blocks.
201run_test ${SPLASH2}/codes/kernels/lu/contiguous_blocks/LU -n512
202
203# LU, non-contiguous blocks.
204#run_test ${SPLASH2}/codes/kernels/lu/non_contiguous_blocks/LU -n512
205
206# Ocean
207run_test ${SPLASH2}/codes/apps/ocean/contiguous_partitions/OCEAN -n258
208#run_test ${SPLASH2}/codes/apps/ocean/non_contiguous_partitions/OCEAN -n258
209
210# Radiosity. Runs fine on a 32-bit OS, but deadlocks on a 64-bit OS. Not clear to me why.
211if [ $(uname -p) = "i686" ]; then
212  psep=' ' run_test ${SPLASH2}/codes/apps/radiosity/RADIOSITY -batch -room -ae 5000.0 -en 0.050 -bf 0.10
213fi
214
215# Radix
216run_test ${SPLASH2}/codes/kernels/radix/RADIX -n$((2**20)) -r1024
217
218# Raytrace
219(
220  cd ${SPLASH2}/codes/apps/raytrace/inputs
221  rm -f *.env *.geo *.rl
222  for f in *Z
223  do
224    gzip -cd <$f >${f%.Z}
225  done
226  cd ..
227  test_args=inputs/car.env psep='' run_test ./RAYTRACE -m64
228)
229
230# Water-n2
231(
232  cd ${SPLASH2}/codes/apps/water-nsquared
233  test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-NSQUARED
234)
235
236# Water-sp
237(
238  cd ${SPLASH2}/codes/apps/water-spatial
239  test_input=${DRD_SCRIPTS_DIR}/run-splash2-water-input psep=' ' run_test ./WATER-SPATIAL
240)
241
242
243
244# Local variables:
245# compile-command: "./run-splash2"
246# End:
247