14d00f9827528d41ec354fa60482091c07fc93d7asewardj
2e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj// Tests shadow memory correctness for 16-byte/32-byte/etc. vector
3e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj// loads/stores. Requires vector_copy() and VECTOR_BYTES to be
4e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj// specified somehow.
5e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj
6e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj#ifndef VECTOR_BYTES
7e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj#error "VECTOR_BYTES must be defined"
8e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj#endif
94d00f9827528d41ec354fa60482091c07fc93d7asewardj
104d00f9827528d41ec354fa60482091c07fc93d7asewardj#include <assert.h>
114d00f9827528d41ec354fa60482091c07fc93d7asewardj#include <stdlib.h>
124d00f9827528d41ec354fa60482091c07fc93d7asewardj#include <stdio.h>
134d00f9827528d41ec354fa60482091c07fc93d7asewardj#include <string.h>
144d00f9827528d41ec354fa60482091c07fc93d7asewardj#include "tests/malloc.h"
154d00f9827528d41ec354fa60482091c07fc93d7asewardj#include "memcheck/memcheck.h"
164d00f9827528d41ec354fa60482091c07fc93d7asewardj
174d00f9827528d41ec354fa60482091c07fc93d7asewardj// What we're actually testing
18e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj// .. is vector_copy, which should be defined before this point
194d00f9827528d41ec354fa60482091c07fc93d7asewardj
204d00f9827528d41ec354fa60482091c07fc93d7asewardj// All the sizes here are in *bytes*, not bits.
214d00f9827528d41ec354fa60482091c07fc93d7asewardj
224d00f9827528d41ec354fa60482091c07fc93d7asewardjtypedef unsigned char        U1;
234d00f9827528d41ec354fa60482091c07fc93d7asewardjtypedef unsigned short       U2;
244d00f9827528d41ec354fa60482091c07fc93d7asewardjtypedef unsigned int         U4;
254d00f9827528d41ec354fa60482091c07fc93d7asewardjtypedef unsigned long long   U8;
264d00f9827528d41ec354fa60482091c07fc93d7asewardjtypedef unsigned long int    UWord;
274d00f9827528d41ec354fa60482091c07fc93d7asewardj
284d00f9827528d41ec354fa60482091c07fc93d7asewardjtypedef unsigned char        Bool;
294d00f9827528d41ec354fa60482091c07fc93d7asewardj#define  True   ((Bool)1)
304d00f9827528d41ec354fa60482091c07fc93d7asewardj#define  False  ((Bool)0)
314d00f9827528d41ec354fa60482091c07fc93d7asewardj
324d00f9827528d41ec354fa60482091c07fc93d7asewardj#define CFENCE __asm__ __volatile__("":::"cc","memory")
334d00f9827528d41ec354fa60482091c07fc93d7asewardj
344d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic __attribute__((noinline)) const char* get_endianness ( void )
354d00f9827528d41ec354fa60482091c07fc93d7asewardj{
364d00f9827528d41ec354fa60482091c07fc93d7asewardj   volatile U4 w32 = 0x88776655;
374d00f9827528d41ec354fa60482091c07fc93d7asewardj   volatile U1* p = (U1*)&w32;
384d00f9827528d41ec354fa60482091c07fc93d7asewardj   if (p[0] == 0x55) {
394d00f9827528d41ec354fa60482091c07fc93d7asewardj      assert(p[3] == 0x88);
404d00f9827528d41ec354fa60482091c07fc93d7asewardj      return "little";
414d00f9827528d41ec354fa60482091c07fc93d7asewardj   }
424d00f9827528d41ec354fa60482091c07fc93d7asewardj   if (p[0] == 0x88) {
434d00f9827528d41ec354fa60482091c07fc93d7asewardj      assert(p[3] == 0x55);
444d00f9827528d41ec354fa60482091c07fc93d7asewardj      return "big";
454d00f9827528d41ec354fa60482091c07fc93d7asewardj   }
464d00f9827528d41ec354fa60482091c07fc93d7asewardj   assert(0);
474d00f9827528d41ec354fa60482091c07fc93d7asewardj}
484d00f9827528d41ec354fa60482091c07fc93d7asewardj
494d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic inline U4 randomU4 ( void )
504d00f9827528d41ec354fa60482091c07fc93d7asewardj{
514d00f9827528d41ec354fa60482091c07fc93d7asewardj   static U4 n = 0;
524d00f9827528d41ec354fa60482091c07fc93d7asewardj   /* From "Numerical Recipes in C" 2nd Edition */
534d00f9827528d41ec354fa60482091c07fc93d7asewardj   n = 1664525UL * n + 1013904223UL;
544d00f9827528d41ec354fa60482091c07fc93d7asewardj   return n;
554d00f9827528d41ec354fa60482091c07fc93d7asewardj}
564d00f9827528d41ec354fa60482091c07fc93d7asewardj
574d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic inline U1 randomU1 ( void )
584d00f9827528d41ec354fa60482091c07fc93d7asewardj{
594d00f9827528d41ec354fa60482091c07fc93d7asewardj   return 0xFF & (randomU4() >> 13);
604d00f9827528d41ec354fa60482091c07fc93d7asewardj}
614d00f9827528d41ec354fa60482091c07fc93d7asewardj
624d00f9827528d41ec354fa60482091c07fc93d7asewardj#define N_BYTES  80000
634d00f9827528d41ec354fa60482091c07fc93d7asewardj#define N_EVENTS (N_BYTES * 2)
644d00f9827528d41ec354fa60482091c07fc93d7asewardj
654d00f9827528d41ec354fa60482091c07fc93d7asewardj// Return x, but with its definedness bits set to be its own value bits
664d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic inline U1 self_shadow ( U1 x )
674d00f9827528d41ec354fa60482091c07fc93d7asewardj{
684d00f9827528d41ec354fa60482091c07fc93d7asewardj   U1 res = 0xFF;
6906bc722457ffe12e056d2f40d0d2f5c8711b541fflorian   (void) VALGRIND_MAKE_MEM_UNDEFINED(&res, 1);
704d00f9827528d41ec354fa60482091c07fc93d7asewardj   res &= x;
714d00f9827528d41ec354fa60482091c07fc93d7asewardj   return res;
724d00f9827528d41ec354fa60482091c07fc93d7asewardj}
734d00f9827528d41ec354fa60482091c07fc93d7asewardj
744d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic inline U1 get_shadow ( U1 x )
754d00f9827528d41ec354fa60482091c07fc93d7asewardj{
764d00f9827528d41ec354fa60482091c07fc93d7asewardj   U1 res = 0;
774d00f9827528d41ec354fa60482091c07fc93d7asewardj   U4 r = VALGRIND_GET_VBITS(&x, &res, 1);
784d00f9827528d41ec354fa60482091c07fc93d7asewardj   assert(r == 1 || r == 0);
794d00f9827528d41ec354fa60482091c07fc93d7asewardj   return res;
804d00f9827528d41ec354fa60482091c07fc93d7asewardj}
814d00f9827528d41ec354fa60482091c07fc93d7asewardj
824d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic inline U1 make_def ( U1 x )
834d00f9827528d41ec354fa60482091c07fc93d7asewardj{
844d00f9827528d41ec354fa60482091c07fc93d7asewardj   U1 y = x;
8506bc722457ffe12e056d2f40d0d2f5c8711b541fflorian   (void) VALGRIND_MAKE_MEM_DEFINED(&y, 1);
864d00f9827528d41ec354fa60482091c07fc93d7asewardj   return y;
874d00f9827528d41ec354fa60482091c07fc93d7asewardj}
884d00f9827528d41ec354fa60482091c07fc93d7asewardj
894d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic inline U1 make_undef ( U1 x )
904d00f9827528d41ec354fa60482091c07fc93d7asewardj{
914d00f9827528d41ec354fa60482091c07fc93d7asewardj   U1 y = x;
9206bc722457ffe12e056d2f40d0d2f5c8711b541fflorian   (void) VALGRIND_MAKE_MEM_UNDEFINED(&y, 1);
934d00f9827528d41ec354fa60482091c07fc93d7asewardj   return y;
944d00f9827528d41ec354fa60482091c07fc93d7asewardj}
954d00f9827528d41ec354fa60482091c07fc93d7asewardj
964d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic void make_noaccess ( U1* dst )
974d00f9827528d41ec354fa60482091c07fc93d7asewardj{
9806bc722457ffe12e056d2f40d0d2f5c8711b541fflorian  (void) VALGRIND_MAKE_MEM_NOACCESS(dst, 1);
994d00f9827528d41ec354fa60482091c07fc93d7asewardj}
1004d00f9827528d41ec354fa60482091c07fc93d7asewardj
1014d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic void apply ( void(*fn)(U4,Bool), U4 arg1, Bool arg2 )
1024d00f9827528d41ec354fa60482091c07fc93d7asewardj{
1034d00f9827528d41ec354fa60482091c07fc93d7asewardj   switch (arg1 & (32-1)) {
1044d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 0: CFENCE; fn(arg1, arg2); CFENCE; break;
1054d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 1: CFENCE; fn(arg1, arg2); CFENCE; break;
1064d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 2: CFENCE; fn(arg1, arg2); CFENCE; break;
1074d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 3: CFENCE; fn(arg1, arg2); CFENCE; break;
1084d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 4: CFENCE; fn(arg1, arg2); CFENCE; break;
1094d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 5: CFENCE; fn(arg1, arg2); CFENCE; break;
1104d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 6: CFENCE; fn(arg1, arg2); CFENCE; break;
1114d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 7: CFENCE; fn(arg1, arg2); CFENCE; break;
1124d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 8: CFENCE; fn(arg1, arg2); CFENCE; break;
1134d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 9: CFENCE; fn(arg1, arg2); CFENCE; break;
1144d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 10: CFENCE; fn(arg1, arg2); CFENCE; break;
1154d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 11: CFENCE; fn(arg1, arg2); CFENCE; break;
1164d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 12: CFENCE; fn(arg1, arg2); CFENCE; break;
1174d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 13: CFENCE; fn(arg1, arg2); CFENCE; break;
1184d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 14: CFENCE; fn(arg1, arg2); CFENCE; break;
1194d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 15: CFENCE; fn(arg1, arg2); CFENCE; break;
1204d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 16: CFENCE; fn(arg1, arg2); CFENCE; break;
1214d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 17: CFENCE; fn(arg1, arg2); CFENCE; break;
1224d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 18: CFENCE; fn(arg1, arg2); CFENCE; break;
1234d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 19: CFENCE; fn(arg1, arg2); CFENCE; break;
1244d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 20: CFENCE; fn(arg1, arg2); CFENCE; break;
1254d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 21: CFENCE; fn(arg1, arg2); CFENCE; break;
1264d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 22: CFENCE; fn(arg1, arg2); CFENCE; break;
1274d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 23: CFENCE; fn(arg1, arg2); CFENCE; break;
1284d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 24: CFENCE; fn(arg1, arg2); CFENCE; break;
1294d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 25: CFENCE; fn(arg1, arg2); CFENCE; break;
1304d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 26: CFENCE; fn(arg1, arg2); CFENCE; break;
1314d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 27: CFENCE; fn(arg1, arg2); CFENCE; break;
1324d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 28: CFENCE; fn(arg1, arg2); CFENCE; break;
1334d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 29: CFENCE; fn(arg1, arg2); CFENCE; break;
1344d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 30: CFENCE; fn(arg1, arg2); CFENCE; break;
1354d00f9827528d41ec354fa60482091c07fc93d7asewardj      case 31: CFENCE; fn(arg1, arg2); CFENCE; break;
1364d00f9827528d41ec354fa60482091c07fc93d7asewardj      default: CFENCE; fn(arg1, arg2); CFENCE; break;
1374d00f9827528d41ec354fa60482091c07fc93d7asewardj   }
1384d00f9827528d41ec354fa60482091c07fc93d7asewardj}
1394d00f9827528d41ec354fa60482091c07fc93d7asewardj
1404d00f9827528d41ec354fa60482091c07fc93d7asewardj  // Try doing some partial-loads-ok/not-ok testing.
1414d00f9827528d41ec354fa60482091c07fc93d7asewardj  /* Test cases:
142e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     - load, aligned, all no-access
1434d00f9827528d41ec354fa60482091c07fc93d7asewardj         ==> addr err
144e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     - load, aligned, 1 to VECTOR_BYTES-1 initial bytes accessible,
1454d00f9827528d41ec354fa60482091c07fc93d7asewardj             then at least one unaccessible byte,
1464d00f9827528d41ec354fa60482091c07fc93d7asewardj             then remaining bytes in any state.
1474d00f9827528d41ec354fa60482091c07fc93d7asewardj         ==> if PLO then no error, but returned V bits are undefined
1484d00f9827528d41ec354fa60482091c07fc93d7asewardj                for unaccessible bytes
1494d00f9827528d41ec354fa60482091c07fc93d7asewardj             else
1504d00f9827528d41ec354fa60482091c07fc93d7asewardj                error; and V bits are defined for unaccessible bytes
1514d00f9827528d41ec354fa60482091c07fc93d7asewardj
152e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     All of the above, but non-aligned:
1534d00f9827528d41ec354fa60482091c07fc93d7asewardj        -- all return an addressing error
1544d00f9827528d41ec354fa60482091c07fc93d7asewardj  */
1554d00f9827528d41ec354fa60482091c07fc93d7asewardj
1564d00f9827528d41ec354fa60482091c07fc93d7asewardjstatic void do_partial_load_case ( U4 nInitialValid, Bool aligned )
1574d00f9827528d41ec354fa60482091c07fc93d7asewardj{
1584d00f9827528d41ec354fa60482091c07fc93d7asewardj     fprintf(stderr,
1594d00f9827528d41ec354fa60482091c07fc93d7asewardj       "------ PL %s case with %u leading acc+def bytes ------\n\n",
1604d00f9827528d41ec354fa60482091c07fc93d7asewardj             aligned ? "Aligned" : "Unaligned", nInitialValid);
1614d00f9827528d41ec354fa60482091c07fc93d7asewardj
162e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     void *temp;
163e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (posix_memalign(&temp, VECTOR_BYTES, 64) != 0)
164e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj         abort();
165e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     U1* block = temp;
1664d00f9827528d41ec354fa60482091c07fc93d7asewardj     U4 j;
1674d00f9827528d41ec354fa60482091c07fc93d7asewardj     for (j = 0; j < 64; j++) block[j] = 0;
1684d00f9827528d41ec354fa60482091c07fc93d7asewardj
1694d00f9827528d41ec354fa60482091c07fc93d7asewardj     if (!aligned) block++;
1704d00f9827528d41ec354fa60482091c07fc93d7asewardj
1714d00f9827528d41ec354fa60482091c07fc93d7asewardj     // Make the block have this pattern:
1724d00f9827528d41ec354fa60482091c07fc93d7asewardj     // block[0 .. i-1]  accessible and defined
173e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     // block[i .. VECTOR_BYTES-1]   repeating NOACCESS, UNDEF, DEF
1744d00f9827528d41ec354fa60482091c07fc93d7asewardj     // hence block[i], at the very least, is always NOACCESS
1754d00f9827528d41ec354fa60482091c07fc93d7asewardj     U4 i = nInitialValid;
176e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     for (j = i; j < VECTOR_BYTES; j++) {
1774d00f9827528d41ec354fa60482091c07fc93d7asewardj        switch ((j-i) % 3) {
1784d00f9827528d41ec354fa60482091c07fc93d7asewardj           case 0: make_noaccess(&block[j]); break;
1794d00f9827528d41ec354fa60482091c07fc93d7asewardj           case 1: block[j] = make_undef(block[j]); break;
1804d00f9827528d41ec354fa60482091c07fc93d7asewardj           case 2: /* already acc and def */ break;
1814d00f9827528d41ec354fa60482091c07fc93d7asewardj        }
1824d00f9827528d41ec354fa60482091c07fc93d7asewardj     }
1834d00f9827528d41ec354fa60482091c07fc93d7asewardj
1844d00f9827528d41ec354fa60482091c07fc93d7asewardj     // Do the access, possibly generating an error, and show the
1854d00f9827528d41ec354fa60482091c07fc93d7asewardj     // resulting V bits
186e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     U1 dst[VECTOR_BYTES];
187e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     vector_copy(&dst[0], block);
1884d00f9827528d41ec354fa60482091c07fc93d7asewardj
189e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     U1 dst_vbits[VECTOR_BYTES];
190e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     U4 r = VALGRIND_GET_VBITS(&dst[0], &dst_vbits[0], VECTOR_BYTES);
1914d00f9827528d41ec354fa60482091c07fc93d7asewardj     assert(r == 1 || r == 0);
1924d00f9827528d41ec354fa60482091c07fc93d7asewardj
1934d00f9827528d41ec354fa60482091c07fc93d7asewardj     fprintf(stderr, "\n");
194e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     for (j = 0; j < VECTOR_BYTES; j++) {
1954d00f9827528d41ec354fa60482091c07fc93d7asewardj        fprintf(stderr, "%c", dst_vbits[j] == 0 ? 'd'
1964d00f9827528d41ec354fa60482091c07fc93d7asewardj                              : dst_vbits[j] == 0xFF ? 'U' : '?');
1974d00f9827528d41ec354fa60482091c07fc93d7asewardj     }
1984d00f9827528d41ec354fa60482091c07fc93d7asewardj     fprintf(stderr, "\n\n");
1994d00f9827528d41ec354fa60482091c07fc93d7asewardj
2004d00f9827528d41ec354fa60482091c07fc93d7asewardj     // Also let's use the resulting value, to check we get an undef
2014d00f9827528d41ec354fa60482091c07fc93d7asewardj     // error
2024d00f9827528d41ec354fa60482091c07fc93d7asewardj     U1 sum = 0;
203e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     for (j = 0; j < VECTOR_BYTES; j++)
2044d00f9827528d41ec354fa60482091c07fc93d7asewardj        sum ^= dst[j];
2054d00f9827528d41ec354fa60482091c07fc93d7asewardj
2064d00f9827528d41ec354fa60482091c07fc93d7asewardj     if (sum == 42) {
2074d00f9827528d41ec354fa60482091c07fc93d7asewardj        CFENCE; fprintf(stderr, "%s", ""); CFENCE;
2084d00f9827528d41ec354fa60482091c07fc93d7asewardj     } else {
2094d00f9827528d41ec354fa60482091c07fc93d7asewardj        CFENCE; fprintf(stderr, "%s", ""); CFENCE;
2104d00f9827528d41ec354fa60482091c07fc93d7asewardj     }
2114d00f9827528d41ec354fa60482091c07fc93d7asewardj
2124d00f9827528d41ec354fa60482091c07fc93d7asewardj     fprintf(stderr, "\n");
2134d00f9827528d41ec354fa60482091c07fc93d7asewardj
2144d00f9827528d41ec354fa60482091c07fc93d7asewardj     if (!aligned) block--;
2154d00f9827528d41ec354fa60482091c07fc93d7asewardj     free(block);
2164d00f9827528d41ec354fa60482091c07fc93d7asewardj}
2174d00f9827528d41ec354fa60482091c07fc93d7asewardj
2184d00f9827528d41ec354fa60482091c07fc93d7asewardjint main ( void )
2194d00f9827528d41ec354fa60482091c07fc93d7asewardj{
220e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  fprintf(stderr, "sh-mem-vec%d: config: %s-endian, %d-bit word size\n",
221e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj          VECTOR_BYTES * 8, get_endianness(), (int)(8 * sizeof(void*)));
2224d00f9827528d41ec354fa60482091c07fc93d7asewardj
2234d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 i;
224e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  void *temp;
225e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  if (posix_memalign(&temp, VECTOR_BYTES, N_BYTES) != 0)
226e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj      abort();
227e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  U1* buf = temp;
2284d00f9827528d41ec354fa60482091c07fc93d7asewardj
2294d00f9827528d41ec354fa60482091c07fc93d7asewardj  // Fill |buf| with bytes, so that zero bits have a zero shadow
2304d00f9827528d41ec354fa60482091c07fc93d7asewardj  // (are defined) and one bits have a one shadow (are undefined)
2314d00f9827528d41ec354fa60482091c07fc93d7asewardj  for (i = 0; i < N_BYTES/2; i++) {
2324d00f9827528d41ec354fa60482091c07fc93d7asewardj     buf[i] = self_shadow( (i & (1<<5)) ? 0x00 : 0xFF );
2334d00f9827528d41ec354fa60482091c07fc93d7asewardj  }
2344d00f9827528d41ec354fa60482091c07fc93d7asewardj  for (     ;  i < N_BYTES; i++) {
2354d00f9827528d41ec354fa60482091c07fc93d7asewardj     buf[i] = self_shadow( randomU1() );
2364d00f9827528d41ec354fa60482091c07fc93d7asewardj  }
2374d00f9827528d41ec354fa60482091c07fc93d7asewardj
2384d00f9827528d41ec354fa60482091c07fc93d7asewardj  // Randomly copy the data around.  Once every 8 srcs/dsts, force
2394d00f9827528d41ec354fa60482091c07fc93d7asewardj  // the src or dst to be aligned.  Once every 64, force both to be
2404d00f9827528d41ec354fa60482091c07fc93d7asewardj  // aligned.  So as to give the fast (aligned) paths some checking.
2414d00f9827528d41ec354fa60482091c07fc93d7asewardj  const U4 n_copies = N_EVENTS;
2424d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 n_d_aligned = 0;
2434d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 n_s_aligned = 0;
2444d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 n_both_aligned = 0;
2454d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 n_fails = 0;
2464d00f9827528d41ec354fa60482091c07fc93d7asewardj
2474d00f9827528d41ec354fa60482091c07fc93d7asewardj  for (i = 0; i < n_copies; i++) {
248e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     U4 si = randomU4() % (N_BYTES-VECTOR_BYTES);
249e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     U4 di = randomU4() % (N_BYTES-VECTOR_BYTES);
250e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (0 == (randomU1() & 7)) si &= ~(VECTOR_BYTES-1);
251e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (0 == (randomU1() & 7)) di &= ~(VECTOR_BYTES-1);
252e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (0 == (randomU1() & 63)) { di &= ~(VECTOR_BYTES-1); si &= ~(VECTOR_BYTES-1); }
2534d00f9827528d41ec354fa60482091c07fc93d7asewardj
2544d00f9827528d41ec354fa60482091c07fc93d7asewardj     void* dst = &buf[di];
2554d00f9827528d41ec354fa60482091c07fc93d7asewardj     void* src = &buf[si];
2564d00f9827528d41ec354fa60482091c07fc93d7asewardj
257e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (0 == (((UWord)src) & (VECTOR_BYTES-1))) n_s_aligned++;
258e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (0 == (((UWord)dst) & (VECTOR_BYTES-1))) n_d_aligned++;
259e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     if (0 == (((UWord)src) & (VECTOR_BYTES-1)) && 0 == (((UWord)dst) & (VECTOR_BYTES-1)))
2604d00f9827528d41ec354fa60482091c07fc93d7asewardj       n_both_aligned++;
2614d00f9827528d41ec354fa60482091c07fc93d7asewardj
262e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj     vector_copy(dst, src);
2634d00f9827528d41ec354fa60482091c07fc93d7asewardj  }
2644d00f9827528d41ec354fa60482091c07fc93d7asewardj
2654d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 freq[256];
2664d00f9827528d41ec354fa60482091c07fc93d7asewardj  for (i = 0; i < 256; i++)
2674d00f9827528d41ec354fa60482091c07fc93d7asewardj     freq[i] = 0;
2684d00f9827528d41ec354fa60482091c07fc93d7asewardj
2694d00f9827528d41ec354fa60482091c07fc93d7asewardj  for (i = 0; i < N_BYTES; i++) {
2704d00f9827528d41ec354fa60482091c07fc93d7asewardj     //if (i > 0 && 0 == (i & 0x0F)) fprintf(stderr, "\n");
2714d00f9827528d41ec354fa60482091c07fc93d7asewardj     U1 v_actual = make_def(buf[i]);
2724d00f9827528d41ec354fa60482091c07fc93d7asewardj     U1 v_shadow = get_shadow(buf[i]);
2734d00f9827528d41ec354fa60482091c07fc93d7asewardj     if (v_actual != v_shadow) n_fails++;
2744d00f9827528d41ec354fa60482091c07fc93d7asewardj     //fprintf(stderr, "%02x:%02x ", (U4)v_actual, (U4)v_shadow);
2754d00f9827528d41ec354fa60482091c07fc93d7asewardj     freq[(U4)v_actual]++;
2764d00f9827528d41ec354fa60482091c07fc93d7asewardj  }
2774d00f9827528d41ec354fa60482091c07fc93d7asewardj
2784d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\n");
2794d00f9827528d41ec354fa60482091c07fc93d7asewardj  U4 totFreq = 0;
2804d00f9827528d41ec354fa60482091c07fc93d7asewardj  for (i = 0; i < 256; i++) {
2814d00f9827528d41ec354fa60482091c07fc93d7asewardj     totFreq += freq[i];
2824d00f9827528d41ec354fa60482091c07fc93d7asewardj     if (i > 0 && (0 == (i % 16))) fprintf(stderr, "\n");
2834d00f9827528d41ec354fa60482091c07fc93d7asewardj     fprintf(stderr, "%5u ", freq[i]);
2844d00f9827528d41ec354fa60482091c07fc93d7asewardj  }
2854d00f9827528d41ec354fa60482091c07fc93d7asewardj  assert(totFreq == N_BYTES);
2864d00f9827528d41ec354fa60482091c07fc93d7asewardj
2874d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\n\n");
2884d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "%u copies, %u d_aligned, %u s_aligned, %u both_aligned\n",
2894d00f9827528d41ec354fa60482091c07fc93d7asewardj         n_copies, n_d_aligned, n_s_aligned, n_both_aligned);
2904d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "%u %s\n", n_fails, n_fails == 0 ? "failures" : "FAILURES");
2914d00f9827528d41ec354fa60482091c07fc93d7asewardj
2924d00f9827528d41ec354fa60482091c07fc93d7asewardj  // Check that we can detect underruns of the block.
2934d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\nExpect 2 x no error\n" );
294e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[100], &buf[0] );
295e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[0],   &buf[100] );
2964d00f9827528d41ec354fa60482091c07fc93d7asewardj
2974d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\nExpect 2 x error\n\n" );
298e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[100], &buf[-1]  ); // invalid rd
299e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[-1],  &buf[100] ); // invalid wr
3004d00f9827528d41ec354fa60482091c07fc93d7asewardj
3014d00f9827528d41ec354fa60482091c07fc93d7asewardj  // and overruns ..
3024d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\nExpect 2 x no error\n" );
303e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[200],            &buf[N_BYTES-VECTOR_BYTES + 0] );
304e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[N_BYTES-VECTOR_BYTES + 0], &buf[200]            );
3054d00f9827528d41ec354fa60482091c07fc93d7asewardj
3064d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\nExpect 2 x error\n\n" );
307e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[200],            &buf[N_BYTES-VECTOR_BYTES + 1] );
308e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  vector_copy( &buf[N_BYTES-VECTOR_BYTES + 1], &buf[200]            );
3094d00f9827528d41ec354fa60482091c07fc93d7asewardj
3104d00f9827528d41ec354fa60482091c07fc93d7asewardj  free(buf);
3114d00f9827528d41ec354fa60482091c07fc93d7asewardj  fprintf(stderr, "\n");
3124d00f9827528d41ec354fa60482091c07fc93d7asewardj
313e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  for (i = 0; i < VECTOR_BYTES; i++)
3144d00f9827528d41ec354fa60482091c07fc93d7asewardj     apply( do_partial_load_case, i, True/*aligned*/ );
3154d00f9827528d41ec354fa60482091c07fc93d7asewardj
316e731e774ef1d12d7b57480e1d70dcc07f116b5d6sewardj  for (i = 0; i < VECTOR_BYTES; i++)
3174d00f9827528d41ec354fa60482091c07fc93d7asewardj     apply( do_partial_load_case, i, False/*not aligned*/ );
3184d00f9827528d41ec354fa60482091c07fc93d7asewardj
3194d00f9827528d41ec354fa60482091c07fc93d7asewardj  return 0;
3204d00f9827528d41ec354fa60482091c07fc93d7asewardj}
321