1// This program is a thorough test of the LOADVn/STOREVn shadow memory
2// operations.
3
4#include <assert.h>
5#include <stdlib.h>
6#include <stdio.h>
7#include <string.h>
8#include "tests/sys_mman.h"
9#include "memcheck/memcheck.h"
10
11// All the sizes here are in *bytes*, not bits.
12
13typedef unsigned char        U1;
14typedef unsigned short       U2;
15typedef unsigned int         U4;
16typedef unsigned long long   U8;
17
18typedef float                F4;
19typedef double               F8;
20
21typedef unsigned long        UWord;
22
23#define PAGE_SIZE 4096ULL
24
25
26// XXX: should check the error cases for SET/GET_VBITS also
27
28// For the byte 'x', build a value of 'size' bytes from that byte, eg:
29//   size 1 --> x
30//   size 2 --> xx
31//   size 4 --> xxxx
32//   size 8 --> xxxxxxxx
33// where the 0 bits are seen by Memcheck as defined, and the 1 bits are
34// seen as undefined (ie. the value of each bit matches its V bit, ie. the
35// resulting value is the same as its metavalue).
36//
37U8 build(int size, U1 byte)
38{
39   int i;
40   U8 mask = 0;
41   U8 shres;
42   U8 res = 0xffffffffffffffffULL, res2;
43   (void)VALGRIND_MAKE_MEM_UNDEFINED(&res, 8);
44   assert(1 == size || 2 == size || 4 == size || 8 == size);
45
46   for (i = 0; i < size; i++) {
47      mask <<= 8;
48      mask |= (U8)byte;
49   }
50
51   res &= mask;
52
53   // res is now considered partially defined, but we know exactly what its
54   // value is (it happens to be the same as its metavalue).
55
56   (void)VALGRIND_GET_VBITS(&res, &shres, 8);
57   res2 = res;
58   (void)VALGRIND_MAKE_MEM_DEFINED(&res2, 8);  // avoid the 'undefined' warning
59   assert(res2 == shres);
60   return res;
61}
62
63U1 make_defined ( U1 x )
64{
65   volatile U1 xx = x;
66   (void)VALGRIND_MAKE_MEM_DEFINED(&xx, 1);
67   return xx;
68}
69
70void check(U1* arr, int n, char* who)
71{
72   int i;
73   U1* shadow = malloc(n);
74   U1 arr_i;
75   U8 sum = 0;
76   (void)VALGRIND_GET_VBITS(arr, shadow, n);
77   for (i = 0; i < n; i++) {
78      arr_i = make_defined(arr[i]);
79      if (arr_i != shadow[i]) {
80          fprintf(stderr, "\n\nFAILURE: %s, byte %d -- "
81                          "is 0x%x, should be 0x%x\n\n",
82                          who, i, shadow[i], arr[i]);
83          exit(1);
84      }
85      sum += (U8)arr_i;
86   }
87   free(shadow);
88   printf("test passed, sum = %llu (%9.5f per byte)\n",
89	  sum, (F8)sum / (F8)n);
90}
91
92static inline U4 randomU4 ( void )
93{
94   static U4 n = 0;
95   /* From "Numerical Recipes in C" 2nd Edition */
96   n = 1664525UL * n + 1013904223UL;
97   return n;
98}
99
100static inline U1 randomU1 ( void )
101{
102   return 0xFF & (randomU4() >> 13);
103}
104
105#define N_BYTES  300000
106#define N_EVENTS (5 * N_BYTES)
107
108
109void do_test_at ( U1* arr )
110{
111   int i;
112
113   U4 mv1 = 0, mv2 = 0, mv4 = 0, mv8 = 0, mv4f = 0, mv8f = 0;
114
115   /* Fill arr with random bytes whose shadows match them. */
116   if (0) printf("-------- arr = %p\n", arr);
117
118   printf("initialising\n");
119   for (i = 0; i < N_BYTES; i++)
120      arr[i] = (U1)build(1, randomU1());
121
122   printf("post-initialisation check\n");
123   check(arr, N_BYTES, "after initialisation");
124
125   /* Now do huge numbers of memory copies. */
126   printf("doing copies\n");
127   for (i = 0; i < N_EVENTS; i++) {
128      U4 ty, src, dst;
129      ty  = (randomU4() >> 13) % 5;
130     tryagain:
131      src = (randomU4() >>  1) % N_BYTES;
132      dst = (randomU4() >>  3) % N_BYTES;
133      switch (ty) {
134         case 0: { // U1
135            *(U1*)(arr+dst) = *(U1*)(arr+src);
136	    mv1++;
137            break;
138         }
139         case 1: { // U2
140            if (src+2 >= N_BYTES || dst+2 >= N_BYTES)
141               goto tryagain;
142            *(U2*)(arr+dst) = *(U2*)(arr+src);
143	    mv2++;
144            break;
145         }
146         case 2: { // U4
147            if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
148               goto tryagain;
149            *(U4*)(arr+dst) = *(U4*)(arr+src);
150	    mv4++;
151            break;
152         }
153         case 3: { // U8
154            if (src+8 >= N_BYTES || dst+8 >= N_BYTES)
155               goto tryagain;
156            *(U8*)(arr+dst) = *(U8*)(arr+src);
157	    mv8++;
158            break;
159         }
160         /* Don't bother with 32-bit floats.  These cause
161            horrible complications, as discussed in sh-mem.c. */
162         /*
163         case 4: { // F4
164            if (src+4 >= N_BYTES || dst+4 >= N_BYTES)
165               goto tryagain;
166            *(F4*)(arr+dst) = *(F4*)(arr+src);
167	    mv4f++;
168            break;
169         }
170         */
171         case 4: { // F8
172            if (src+8 >= N_BYTES || dst+8 >= N_BYTES)
173               goto tryagain;
174#if defined(__i386__)
175	    /* Copying via an x87 register causes the test to fail,
176               because (I think) some obscure values that are FP
177               denormals get changed during the copy due to the FPU
178               normalising, or rounding, or whatever, them.  This
179               causes them to no longer bit-for-bit match the
180               accompanying metadata.  Yet we still need to do a
181               genuine 8-byte load/store to test the relevant memcheck
182               {LOADV8,STOREV8} routines.  Hence use the MMX registers
183               instead, as copying through them should be
184               straightforward.. */
185            __asm__ __volatile__(
186               "movq (%1), %%mm2\n\t"
187               "movq %%mm2, (%0)\n\t"
188               "emms"
189               : : "r"(arr+dst), "r"(arr+src) : "memory"
190            );
191#else
192            /* Straightforward.  On amd64, this gives a load/store of
193               the bottom half of an xmm register.  On ppc32/64 this
194               is a straighforward load/store of an FP register. */
195            *(F8*)(arr+dst) = *(F8*)(arr+src);
196#endif
197	    mv8f++;
198            break;
199         }
200         default:
201	   fprintf(stderr, "sh-mem-random: bad size\n");
202	   exit(0);
203      }
204   }
205
206   printf("final check\n");
207   check(arr, N_BYTES, "final check");
208
209   printf("counts 1/2/4/8/F4/F8: %d %d %d %d %d %d\n",
210          mv1, mv2, mv4, mv8, mv4f, mv8f);
211}
212
213
214
215int main(void)
216{
217   U1* arr;
218
219   if (0 == RUNNING_ON_VALGRIND) {
220      fprintf(stderr, "error: this program only works when run under Valgrind\n");
221      exit(1);
222   }
223
224   printf("-------- testing non-auxmap range --------\n");
225
226   arr = malloc(N_BYTES);
227   assert(arr);
228   do_test_at(arr);
229   free(arr);
230
231   if (sizeof(void*) == 8) {
232      // 64-bit platform.
233      int tries;
234      int nbytes_p;
235      // (U1*)(UWord)constULL funny casting to keep gcc quiet on
236      // 32-bit platforms
237      U1* huge_addr = (U1*)(UWord)0x6600000000ULL;  // 408GB
238      // Note, kernel 2.6.? on Athlon64 refuses fixed mmap requests
239      // at above 512GB.
240
241      printf("-------- testing auxmap range --------\n");
242
243      nbytes_p = (N_BYTES + PAGE_SIZE) & ~(PAGE_SIZE-1);
244
245      for (tries = 0; tries < 10; tries++) {
246         arr = mmap(huge_addr, nbytes_p, PROT_READ|PROT_WRITE,
247                    MAP_FIXED|MAP_PRIVATE|MAP_ANONYMOUS, -1,0);
248	 if (arr != MAP_FAILED)
249            break;
250	 // hmm. fudge the address and try again.
251         huge_addr += (randomU4() & ~(PAGE_SIZE-1));
252      }
253
254      if (tries >= 10) {
255	   fprintf(stderr, "sh-mem-random: can't mmap hi-mem\n");
256	   exit(0);
257      }
258      assert(arr != MAP_FAILED);
259
260      do_test_at(arr);
261   }
262
263   return 0;
264
265}
266