1
2// Tests shadow memory correctness for 16-byte/32-byte/etc. vector
3// loads/stores. Requires vector_copy() and VECTOR_BYTES to be
4// specified somehow.
5
6#ifndef VECTOR_BYTES
7#error "VECTOR_BYTES must be defined"
8#endif
9
10#include <assert.h>
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14#include "tests/malloc.h"
15#include "memcheck/memcheck.h"
16
17// What we're actually testing
18// .. is vector_copy, which should be defined before this point
19
20// All the sizes here are in *bytes*, not bits.
21
22typedef unsigned char        U1;
23typedef unsigned short       U2;
24typedef unsigned int         U4;
25typedef unsigned long long   U8;
26typedef unsigned long int    UWord;
27
28typedef unsigned char        Bool;
29#define  True   ((Bool)1)
30#define  False  ((Bool)0)
31
32#define CFENCE __asm__ __volatile__("":::"cc","memory")
33
34static __attribute__((noinline)) const char* get_endianness ( void )
35{
36   volatile U4 w32 = 0x88776655;
37   volatile U1* p = (U1*)&w32;
38   if (p[0] == 0x55) {
39      assert(p[3] == 0x88);
40      return "little";
41   }
42   if (p[0] == 0x88) {
43      assert(p[3] == 0x55);
44      return "big";
45   }
46   assert(0);
47}
48
49static inline U4 randomU4 ( void )
50{
51   static U4 n = 0;
52   /* From "Numerical Recipes in C" 2nd Edition */
53   n = 1664525UL * n + 1013904223UL;
54   return n;
55}
56
57static inline U1 randomU1 ( void )
58{
59   return 0xFF & (randomU4() >> 13);
60}
61
62#define N_BYTES  80000
63#define N_EVENTS (N_BYTES * 2)
64
65// Return x, but with its definedness bits set to be its own value bits
66static inline U1 self_shadow ( U1 x )
67{
68   U1 res = 0xFF;
69   (void) VALGRIND_MAKE_MEM_UNDEFINED(&res, 1);
70   res &= x;
71   return res;
72}
73
74static inline U1 get_shadow ( U1 x )
75{
76   U1 res = 0;
77   U4 r = VALGRIND_GET_VBITS(&x, &res, 1);
78   assert(r == 1 || r == 0);
79   return res;
80}
81
82static inline U1 make_def ( U1 x )
83{
84   U1 y = x;
85   (void) VALGRIND_MAKE_MEM_DEFINED(&y, 1);
86   return y;
87}
88
89static inline U1 make_undef ( U1 x )
90{
91   U1 y = x;
92   (void) VALGRIND_MAKE_MEM_UNDEFINED(&y, 1);
93   return y;
94}
95
96static void make_noaccess ( U1* dst )
97{
98  (void) VALGRIND_MAKE_MEM_NOACCESS(dst, 1);
99}
100
101static void apply ( void(*fn)(U4,Bool), U4 arg1, Bool arg2 )
102{
103   switch (arg1 & (32-1)) {
104      case 0: CFENCE; fn(arg1, arg2); CFENCE; break;
105      case 1: CFENCE; fn(arg1, arg2); CFENCE; break;
106      case 2: CFENCE; fn(arg1, arg2); CFENCE; break;
107      case 3: CFENCE; fn(arg1, arg2); CFENCE; break;
108      case 4: CFENCE; fn(arg1, arg2); CFENCE; break;
109      case 5: CFENCE; fn(arg1, arg2); CFENCE; break;
110      case 6: CFENCE; fn(arg1, arg2); CFENCE; break;
111      case 7: CFENCE; fn(arg1, arg2); CFENCE; break;
112      case 8: CFENCE; fn(arg1, arg2); CFENCE; break;
113      case 9: CFENCE; fn(arg1, arg2); CFENCE; break;
114      case 10: CFENCE; fn(arg1, arg2); CFENCE; break;
115      case 11: CFENCE; fn(arg1, arg2); CFENCE; break;
116      case 12: CFENCE; fn(arg1, arg2); CFENCE; break;
117      case 13: CFENCE; fn(arg1, arg2); CFENCE; break;
118      case 14: CFENCE; fn(arg1, arg2); CFENCE; break;
119      case 15: CFENCE; fn(arg1, arg2); CFENCE; break;
120      case 16: CFENCE; fn(arg1, arg2); CFENCE; break;
121      case 17: CFENCE; fn(arg1, arg2); CFENCE; break;
122      case 18: CFENCE; fn(arg1, arg2); CFENCE; break;
123      case 19: CFENCE; fn(arg1, arg2); CFENCE; break;
124      case 20: CFENCE; fn(arg1, arg2); CFENCE; break;
125      case 21: CFENCE; fn(arg1, arg2); CFENCE; break;
126      case 22: CFENCE; fn(arg1, arg2); CFENCE; break;
127      case 23: CFENCE; fn(arg1, arg2); CFENCE; break;
128      case 24: CFENCE; fn(arg1, arg2); CFENCE; break;
129      case 25: CFENCE; fn(arg1, arg2); CFENCE; break;
130      case 26: CFENCE; fn(arg1, arg2); CFENCE; break;
131      case 27: CFENCE; fn(arg1, arg2); CFENCE; break;
132      case 28: CFENCE; fn(arg1, arg2); CFENCE; break;
133      case 29: CFENCE; fn(arg1, arg2); CFENCE; break;
134      case 30: CFENCE; fn(arg1, arg2); CFENCE; break;
135      case 31: CFENCE; fn(arg1, arg2); CFENCE; break;
136      default: CFENCE; fn(arg1, arg2); CFENCE; break;
137   }
138}
139
140  // Try doing some partial-loads-ok/not-ok testing.
141  /* Test cases:
142     - load, aligned, all no-access
143         ==> addr err
144     - load, aligned, 1 to VECTOR_BYTES-1 initial bytes accessible,
145             then at least one unaccessible byte,
146             then remaining bytes in any state.
147         ==> if PLO then no error, but returned V bits are undefined
148                for unaccessible bytes
149             else
150                error; and V bits are defined for unaccessible bytes
151
152     All of the above, but non-aligned:
153        -- all return an addressing error
154  */
155
156static void do_partial_load_case ( U4 nInitialValid, Bool aligned )
157{
158     fprintf(stderr,
159       "------ PL %s case with %u leading acc+def bytes ------\n\n",
160             aligned ? "Aligned" : "Unaligned", nInitialValid);
161
162     void *temp;
163     if (posix_memalign(&temp, VECTOR_BYTES, 64) != 0)
164         abort();
165     U1* block = temp;
166     U4 j;
167     for (j = 0; j < 64; j++) block[j] = 0;
168
169     if (!aligned) block++;
170
171     // Make the block have this pattern:
172     // block[0 .. i-1]  accessible and defined
173     // block[i .. VECTOR_BYTES-1]   repeating NOACCESS, UNDEF, DEF
174     // hence block[i], at the very least, is always NOACCESS
175     U4 i = nInitialValid;
176     for (j = i; j < VECTOR_BYTES; j++) {
177        switch ((j-i) % 3) {
178           case 0: make_noaccess(&block[j]); break;
179           case 1: block[j] = make_undef(block[j]); break;
180           case 2: /* already acc and def */ break;
181        }
182     }
183
184     // Do the access, possibly generating an error, and show the
185     // resulting V bits
186     U1 dst[VECTOR_BYTES];
187     vector_copy(&dst[0], block);
188
189     U1 dst_vbits[VECTOR_BYTES];
190     U4 r = VALGRIND_GET_VBITS(&dst[0], &dst_vbits[0], VECTOR_BYTES);
191     assert(r == 1 || r == 0);
192
193     fprintf(stderr, "\n");
194     for (j = 0; j < VECTOR_BYTES; j++) {
195        fprintf(stderr, "%c", dst_vbits[j] == 0 ? 'd'
196                              : dst_vbits[j] == 0xFF ? 'U' : '?');
197     }
198     fprintf(stderr, "\n\n");
199
200     // Also let's use the resulting value, to check we get an undef
201     // error
202     U1 sum = 0;
203     for (j = 0; j < VECTOR_BYTES; j++)
204        sum ^= dst[j];
205
206     if (sum == 42) {
207        CFENCE; fprintf(stderr, "%s", ""); CFENCE;
208     } else {
209        CFENCE; fprintf(stderr, "%s", ""); CFENCE;
210     }
211
212     fprintf(stderr, "\n");
213
214     if (!aligned) block--;
215     free(block);
216}
217
218int main ( void )
219{
220  fprintf(stderr, "sh-mem-vec%d: config: %s-endian, %d-bit word size\n",
221          VECTOR_BYTES * 8, get_endianness(), (int)(8 * sizeof(void*)));
222
223  U4 i;
224  void *temp;
225  if (posix_memalign(&temp, VECTOR_BYTES, N_BYTES) != 0)
226      abort();
227  U1* buf = temp;
228
229  // Fill |buf| with bytes, so that zero bits have a zero shadow
230  // (are defined) and one bits have a one shadow (are undefined)
231  for (i = 0; i < N_BYTES/2; i++) {
232     buf[i] = self_shadow( (i & (1<<5)) ? 0x00 : 0xFF );
233  }
234  for (     ;  i < N_BYTES; i++) {
235     buf[i] = self_shadow( randomU1() );
236  }
237
238  // Randomly copy the data around.  Once every 8 srcs/dsts, force
239  // the src or dst to be aligned.  Once every 64, force both to be
240  // aligned.  So as to give the fast (aligned) paths some checking.
241  const U4 n_copies = N_EVENTS;
242  U4 n_d_aligned = 0;
243  U4 n_s_aligned = 0;
244  U4 n_both_aligned = 0;
245  U4 n_fails = 0;
246
247  for (i = 0; i < n_copies; i++) {
248     U4 si = randomU4() % (N_BYTES-VECTOR_BYTES);
249     U4 di = randomU4() % (N_BYTES-VECTOR_BYTES);
250     if (0 == (randomU1() & 7)) si &= ~(VECTOR_BYTES-1);
251     if (0 == (randomU1() & 7)) di &= ~(VECTOR_BYTES-1);
252     if (0 == (randomU1() & 63)) { di &= ~(VECTOR_BYTES-1); si &= ~(VECTOR_BYTES-1); }
253
254     void* dst = &buf[di];
255     void* src = &buf[si];
256
257     if (0 == (((UWord)src) & (VECTOR_BYTES-1))) n_s_aligned++;
258     if (0 == (((UWord)dst) & (VECTOR_BYTES-1))) n_d_aligned++;
259     if (0 == (((UWord)src) & (VECTOR_BYTES-1)) && 0 == (((UWord)dst) & (VECTOR_BYTES-1)))
260       n_both_aligned++;
261
262     vector_copy(dst, src);
263  }
264
265  U4 freq[256];
266  for (i = 0; i < 256; i++)
267     freq[i] = 0;
268
269  for (i = 0; i < N_BYTES; i++) {
270     //if (i > 0 && 0 == (i & 0x0F)) fprintf(stderr, "\n");
271     U1 v_actual = make_def(buf[i]);
272     U1 v_shadow = get_shadow(buf[i]);
273     if (v_actual != v_shadow) n_fails++;
274     //fprintf(stderr, "%02x:%02x ", (U4)v_actual, (U4)v_shadow);
275     freq[(U4)v_actual]++;
276  }
277
278  fprintf(stderr, "\n");
279  U4 totFreq = 0;
280  for (i = 0; i < 256; i++) {
281     totFreq += freq[i];
282     if (i > 0 && (0 == (i % 16))) fprintf(stderr, "\n");
283     fprintf(stderr, "%5u ", freq[i]);
284  }
285  assert(totFreq == N_BYTES);
286
287  fprintf(stderr, "\n\n");
288  fprintf(stderr, "%u copies, %u d_aligned, %u s_aligned, %u both_aligned\n",
289         n_copies, n_d_aligned, n_s_aligned, n_both_aligned);
290  fprintf(stderr, "%u %s\n", n_fails, n_fails == 0 ? "failures" : "FAILURES");
291
292  // Check that we can detect underruns of the block.
293  fprintf(stderr, "\nExpect 2 x no error\n" );
294  vector_copy( &buf[100], &buf[0] );
295  vector_copy( &buf[0],   &buf[100] );
296
297  fprintf(stderr, "\nExpect 2 x error\n\n" );
298  vector_copy( &buf[100], &buf[-1]  ); // invalid rd
299  vector_copy( &buf[-1],  &buf[100] ); // invalid wr
300
301  // and overruns ..
302  fprintf(stderr, "\nExpect 2 x no error\n" );
303  vector_copy( &buf[200],            &buf[N_BYTES-VECTOR_BYTES + 0] );
304  vector_copy( &buf[N_BYTES-VECTOR_BYTES + 0], &buf[200]            );
305
306  fprintf(stderr, "\nExpect 2 x error\n\n" );
307  vector_copy( &buf[200],            &buf[N_BYTES-VECTOR_BYTES + 1] );
308  vector_copy( &buf[N_BYTES-VECTOR_BYTES + 1], &buf[200]            );
309
310  free(buf);
311  fprintf(stderr, "\n");
312
313  for (i = 0; i < VECTOR_BYTES; i++)
314     apply( do_partial_load_case, i, True/*aligned*/ );
315
316  for (i = 0; i < VECTOR_BYTES; i++)
317     apply( do_partial_load_case, i, False/*not aligned*/ );
318
319  return 0;
320}
321