1// This program is a thorough test of the LOADVn/STOREVn shadow memory
2// operations.
3
4#include <assert.h>
5#include <stdlib.h>
6#include <stdio.h>
7#include <string.h>
8#include "memcheck/memcheck.h"
9
10// All the sizes here are in *bytes*, not bits.
11
12typedef unsigned char        U1;
13typedef unsigned short       U2;
14typedef unsigned int         U4;
15typedef unsigned long long   U8;
16
17typedef float                F4;
18typedef double               F8;
19
20#define SZB_OF_a    64
21
22// a[] is the array in which we do our loads and stores.
23// b[] is another one in which we do some copying.
24U8 a [SZB_OF_a / 8];    // Type is U8 to ensure it's 8-aligned
25U8 b [SZB_OF_a / 8];    // same size as a[]
26
27// XXX: should check the error cases for SET/GET_VBITS also
28
29// For the byte 'x', build a value of 'size' bytes from that byte, eg:
30//   size 1 --> x
31//   size 2 --> xx
32//   size 4 --> xxxx
33//   size 8 --> xxxxxxxx
34// where the 0 bits are seen by Memcheck as defined, and the 1 bits are
35// seen as undefined (ie. the value of each bit matches its V bit, ie. the
36// resulting value is the same as its metavalue).
37//
38U8 build(int size, U1 byte)
39{
40   int i;
41   U8 mask = 0;
42   U8 shres;
43   U8 res = 0xffffffffffffffffULL, res2;
44   VALGRIND_MAKE_MEM_UNDEFINED(&res, 8);
45   assert(1 == size || 2 == size || 4 == size || 8 == size);
46
47   for (i = 0; i < size; i++) {
48      mask <<= 8;
49      mask |= (U8)byte;
50   }
51
52   res &= mask;
53
54   // res is now considered partially defined, but we know exactly what its
55   // value is (it happens to be the same as its metavalue).
56
57   (void)VALGRIND_GET_VBITS(&res, &shres, 8);
58   res2 = res;
59   (void)VALGRIND_MAKE_MEM_DEFINED(&res2, 8);  // avoid the 'undefined' warning
60   assert(res2 == shres);
61   return res;
62}
63
64// Check that all the bytes in a[x..y-1] have their V byte equal
65// to either 'expected_byte' or 'expected_byte_alt'.
66// 'str' and 'offset' are only used for printing an error message if
67// something goes wrong.
68void check_all(U4 x, U4 y, U1 expected_byte, U1 expected_byte_alt,
69                           char* str, int offset)
70{
71   U1 sh[SZB_OF_a];     // Used for getting a[]'s V bits
72   int i;
73
74   (void)VALGRIND_GET_VBITS(a, sh, sizeof(a));
75   for (i = x; i < y; i++) {
76      if ( expected_byte != sh[i] && expected_byte_alt != sh[i] ) {
77         fprintf(stderr, "\n\nFAILURE: %s, offset %d, byte %d -- "
78                         "is 0x%x, should be 0x%x or 0x%x\n\n",
79                         str, offset, i, sh[i], expected_byte,
80                         expected_byte_alt);
81         exit(1);
82      }
83   }
84}
85
86int main(void)
87{
88   int h, i, j;
89   U1 *undefA, expected_byte, expected_byte_alt;
90
91   if (0 == RUNNING_ON_VALGRIND) {
92      fprintf(stderr, "error: this program only works when run under Valgrind\n");
93      exit(1);
94   }
95
96   // Check a[] has the expected alignment, and that it's not too high in
97   // the address space (which would trigger the slow cases in
98   // LOADVn/STOREVn) on 64-bit platforms).
99   assert( 0 == (long)a % 8);
100   if (sizeof(void*) == 8) {
101      assert( ((U1*)(&a[0])) < ((U1*)(32ULL * 1024*1024*1024)/*32G*/) );
102   }
103
104   // Check basic types have the expected sizes.
105   assert(1 == sizeof(U1));
106   assert(2 == sizeof(U2));
107   assert(4 == sizeof(U4));
108   assert(8 == sizeof(U8));
109
110   // Create an array of values that has all the possible V bit metavalues.
111   // Because 0 represents a defined bit, and because undefA[] is initially
112   // zeroed, we have the nice property that:
113   //
114   //    i == undefA[i] == V_bits_of(undefA[i])
115   //
116   // which is useful for testing below.
117   undefA = calloc(1, 256);         // one for each possible undefinedness value
118   VALGRIND_MAKE_MEM_UNDEFINED(undefA, 256);
119   for (i = 0; i < 256; i++) {
120      undefA[i] &= i;
121   }
122
123   // This code does a whole lot of reads and writes of a particular size
124   // (NNN = 1, 2, 4 or 8), with varying alignments, of values with
125   // different not/partially/fully defined metavalues, and checks that the
126   // V bits are set in a[] as expected using GET_VBITS.
127   //
128   // 'Ty' is the type of the thing we are copying.  It can be an integer
129   // type or an FP type.  'ITy' is the same-sized integer type (and thus
130   // will be the same as 'Ty' if 'ITy' is an integer type).  'ITy' is used
131   // when doing shifting/masking and stuff like that.
132
133#define DO(NNN, Ty, ITy, isF4) \
134   fprintf(stderr, "-- NNN: %d %s %s ------------------------\n", NNN, #Ty, #ITy); \
135   /* For all of the alignments from (0..NNN-1), eg. if NNN==4, we do */ \
136   /* alignments of 0, 1, 2, 3. */ \
137   for (h = 0; h < NNN; h++) { \
138 \
139      size_t n  = sizeof(a); \
140      size_t nN = n / sizeof(Ty); \
141      Ty* aN    = (Ty*)a; \
142      Ty* bN    = (Ty*)b; \
143      Ty* aNb   = (Ty*)(((U1*)aN) + h); /* set offset from a[] */ \
144      Ty* bNb   = (Ty*)(((U1*)bN) + h); /* set offset from b[] */ \
145 \
146      fprintf(stderr, "h = %d (checking %d..%d)   ", h, h, (int)(n-NNN+h)); \
147 \
148      /* For each of the 256 possible V byte values... */ \
149      for (j = 0; j < 256; j++) { \
150         /* build the value for i (one of: i, ii, iiii, iiiiiiii) */ \
151         U8  tmp        = build(NNN, j); \
152         ITy undefN_ITy = (ITy)tmp; \
153         Ty* undefN_Ty; \
154         { /* This just checks that no overflow occurred when squeezing */ \
155           /* the output of build() into a variable of type 'Ty'. */ \
156            U8  tmpDef     = tmp; \
157            ITy undefN_ITyDef = undefN_ITy; \
158            VALGRIND_MAKE_MEM_DEFINED(&tmpDef,        8  ); \
159            VALGRIND_MAKE_MEM_DEFINED(&undefN_ITyDef, NNN); \
160            assert(tmpDef == (U8)undefN_ITyDef); \
161         } \
162 \
163         /* We have to use an array for undefN_Ty -- because if we try to
164          * convert an integer type from build into an FP type with a
165          * straight cast -- eg "float f = (float)i" -- the value gets
166          * converted.  With this pointer/array nonsense the exact bit
167          * pattern gets used as an FP value unchanged (that FP value is
168          * undoubtedly nonsense, but that's not a problem here). */ \
169         undefN_Ty = (Ty*)&undefN_ITy; \
170         if (0 == j % 32) fprintf(stderr, "%d...", j); /* progress meter */ \
171 \
172 \
173         /* A nasty exception: most machines so far (x86/PPC32/PPC64)
174          * don't have 32-bit floats.  So 32-bit floats get cast to 64-bit
175          * floats.  Memcheck does a PCast in this case, which means that if
176          * any V bits for the 32-bit float are undefined (ie. 0 != j), all
177          * the V bits in the 64-bit float are undefined.  So account for
178          * this when checking.  AMD64 typically does FP arithmetic on
179          * SSE, effectively giving it access to 32-bit FP registers.  So
180          * in short, for floats, we have to allow either 'j' or 0xFF
181          * as an acceptable result.  Sigh. */ \
182         if (isF4) { \
183            expected_byte = j; \
184            expected_byte_alt = 0 != j ? 0xFF : j; \
185         } else { \
186            expected_byte = j; \
187            expected_byte_alt = j; \
188         } \
189 \
190         /* STOREVn.  Note that we use the first element of the undefN_Ty
191          * array, as explained above. */ \
192         for (i = 0; i < nN-1; i++) { aNb[i] = undefN_Ty[0]; } \
193         check_all(h, n-NNN+h, expected_byte, expected_byte_alt, "STOREVn", h); \
194 \
195         /* LOADVn -- by copying the values to one place and then back,
196          * we ensure that LOADVn gets exercised. */ \
197         for (i = 0; i < nN-1; i++) { bNb[i] = aNb[i]; } \
198         for (i = 0; i < nN-1; i++) { aNb[i] = bNb[i]; } \
199         check_all(h, n-NNN+h, expected_byte, expected_byte_alt, "LOADVn", h); \
200      } \
201      fprintf(stderr, "\n"); \
202   }
203
204   // For sizes 4 and 8 we do both integer and floating-point types.  The
205   // reason being that on 32-bit machines just using integer types never
206   // exercises LOADV8/STOREV8 -- for integer types these loads/stores get
207   // broken into two 32-bit loads/stores.
208   DO(1, U1, U1, /*isF4*/0);
209   DO(2, U2, U2, /*isF4*/0);
210   DO(4, U4, U4, /*isF4*/0);
211   DO(4, F4, U4, /*isF4*/1);
212   DO(8, U8, U8, /*isF4*/0);
213   DO(8, F8, U8, /*isF4*/0);
214
215   return 0;
216}
217