1
2#include <stdio.h>
3#include <stdlib.h>
4#include <assert.h>
5
6#define VERBOSE 0
7
8typedef  unsigned int            UInt;
9typedef  unsigned char           UChar;
10typedef  unsigned long long int  ULong;
11typedef  signed long long int    Long;
12typedef  signed int              Int;
13typedef  unsigned short          UShort;
14typedef  unsigned long           UWord;
15typedef  char                    HChar;
16
17unsigned myrandom(void)
18{
19   /* Simple multiply-with-carry random generator. */
20   static unsigned m_w = 11;
21   static unsigned m_z = 13;
22
23   m_z = 36969 * (m_z & 65535) + (m_z >> 16);
24   m_w = 18000 * (m_w & 65535) + (m_w >> 16);
25
26   return (m_z << 16) + m_w;
27}
28
29/////////////////////////////////////////////////////////////////
30// BEGIN crc32 stuff                                           //
31/////////////////////////////////////////////////////////////////
32
33static const UInt crc32Table[256] = {
34
35   /*-- Ugly, innit? --*/
36
37   0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
38   0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
39   0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
40   0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
41   0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
42   0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
43   0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
44   0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
45   0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
46   0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
47   0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
48   0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
49   0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
50   0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
51   0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
52   0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
53   0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
54   0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
55   0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
56   0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
57   0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
58   0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
59   0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
60   0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
61   0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
62   0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
63   0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
64   0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
65   0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
66   0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
67   0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
68   0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
69   0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
70   0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
71   0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
72   0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
73   0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
74   0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
75   0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
76   0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
77   0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
78   0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
79   0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
80   0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
81   0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
82   0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
83   0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
84   0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
85   0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
86   0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
87   0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
88   0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
89   0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
90   0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
91   0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
92   0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
93   0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
94   0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
95   0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
96   0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
97   0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
98   0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
99   0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
100   0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
101};
102
103#define UPDATE_CRC(crcVar,cha)                 \
104{                                              \
105   crcVar = (crcVar << 8) ^                    \
106            crc32Table[(crcVar >> 24) ^        \
107                       ((UChar)cha)];          \
108}
109
110static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
111{
112   UInt crc = crcIn;
113   while (nBytes >= 4) {
114      UPDATE_CRC(crc, bytes[0]);
115      UPDATE_CRC(crc, bytes[1]);
116      UPDATE_CRC(crc, bytes[2]);
117      UPDATE_CRC(crc, bytes[3]);
118      bytes += 4;
119      nBytes -= 4;
120   }
121   while (nBytes >= 1) {
122      UPDATE_CRC(crc, bytes[0]);
123      bytes += 1;
124      nBytes -= 1;
125   }
126   return crc;
127}
128
129static UInt crcFinalise ( UInt crc ) {
130   return ~crc;
131}
132
133////////
134
135static UInt theCRC = 0xFFFFFFFF;
136
137static HChar outBuf[1024];
138// take output that's in outBuf, length as specified, and
139// update the running crc.
140static void send ( int nbytes )
141{
142   assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
143   assert(outBuf[nbytes] == 0);
144   theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
145   if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
146}
147
148
149/////////////////////////////////////////////////////////////////
150// END crc32 stuff                                             //
151/////////////////////////////////////////////////////////////////
152
153#if 0
154
155// full version
156#define NVALS 57
157
158static unsigned int val[NVALS]
159    = { 0x00, 0x01, 0x02, 0x03,
160        0x3F, 0x40, 0x41,
161        0x7E, 0x7F, 0x80, 0x81, 0x82,
162        0xBF, 0xC0, 0xC1,
163        0xFC, 0xFD, 0xFE, 0xFF,
164
165        0xFF00, 0xFF01, 0xFF02, 0xFF03,
166        0xFF3F, 0xFF40, 0xFF41,
167        0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
168        0xFFBF, 0xFFC0, 0xFFC1,
169        0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
170
171        0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
172        0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
173        0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
174        0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
175        0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
176      };
177
178#else
179
180// shortened version, for use as valgrind regtest
181#define NVALS 27
182
183static unsigned int val[NVALS]
184    = { 0x00, 0x01,
185        0x3F, 0x40,
186        0x7F, 0x80,
187        0xBF, 0xC0,
188        0xFF,
189
190        0xFF00, 0xFF01,
191        0xFF3F, 0xFF40,
192        0xFF7F, 0xFF80,
193        0xFFBF, 0xFFC0,
194        0xFFFF,
195
196        0xFFFFFF00, 0xFFFFFF01,
197        0xFFFFFF3F, 0xFFFFFF40,
198        0xFFFFFF7F, 0xFFFFFF80,
199        0xFFFFFFBF, 0xFFFFFFC0,
200        0xFFFFFFFF
201      };
202
203#endif
204
205/////////////////////////////////////
206
207#define CC_C    0x0001
208#define CC_P    0x0004
209#define CC_A    0x0010
210#define CC_Z    0x0040
211#define CC_S    0x0080
212#define CC_O    0x0800
213
214#define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
215
216#define GEN_do_locked_G_E(_name,_eax)   \
217  \
218  __attribute__((noinline)) void do_locked_G_E_##_name ( void )  \
219  {   \
220    volatile int e_val, g_val, e_val_before;   \
221    int o, s, z, a, c, p, v1, v2, flags_in;   \
222    int block[4];   \
223    \
224    for (v1 = 0; v1 < NVALS; v1++) {   \
225    for (v2 = 0; v2 < NVALS; v2++) {   \
226    \
227    for (o = 0; o < 2; o++) {   \
228    for (s = 0; s < 2; s++) {   \
229    for (z = 0; z < 2; z++) {   \
230    for (a = 0; a < 2; a++) {   \
231    for (c = 0; c < 2; c++) {   \
232    for (p = 0; p < 2; p++) {   \
233      \
234      flags_in = (o ? CC_O : 0)   \
235               | (s ? CC_S : 0)   \
236               | (z ? CC_Z : 0)   \
237               | (a ? CC_A : 0)   \
238               | (c ? CC_C : 0)   \
239               | (p ? CC_P : 0);   \
240      \
241      g_val = val[v1];   \
242      e_val = val[v2];   \
243      e_val_before = e_val;   \
244      \
245      block[0] = flags_in;   \
246      block[1] = g_val;   \
247      block[2] = (int)(long)&e_val;   \
248      block[3] = 0;   \
249      __asm__ __volatile__(   \
250          "movl 0(%0), %%eax\n\t"   \
251          "pushl %%eax\n\t"   \
252          "popfl\n\t"   \
253          "movl 4(%0), %%eax\n\t"   \
254          "movl 8(%0), %%ebx\n\t"   \
255          "lock; " #_name " %%" #_eax ",(%%ebx)\n\t"   \
256          "pushfl\n\t"   \
257          "popl %%eax\n\t"   \
258          "movl %%eax, 12(%0)\n\t"   \
259          : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
260      );   \
261      \
262      send( \
263         sprintf(outBuf,                                        \
264                 "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
265                 #_name, g_val, e_val_before, flags_in,   \
266                 e_val, block[3] & CC_MASK) );            \
267      \
268    }}}}}}   \
269    \
270    }}   \
271  }
272
273GEN_do_locked_G_E(addb,al)
274GEN_do_locked_G_E(addw,ax)
275GEN_do_locked_G_E(addl,eax)
276
277GEN_do_locked_G_E(orb, al)
278GEN_do_locked_G_E(orw, ax)
279GEN_do_locked_G_E(orl, eax)
280
281GEN_do_locked_G_E(adcb,al)
282GEN_do_locked_G_E(adcw,ax)
283GEN_do_locked_G_E(adcl,eax)
284
285GEN_do_locked_G_E(sbbb,al)
286GEN_do_locked_G_E(sbbw,ax)
287GEN_do_locked_G_E(sbbl,eax)
288
289GEN_do_locked_G_E(andb,al)
290GEN_do_locked_G_E(andw,ax)
291GEN_do_locked_G_E(andl,eax)
292
293GEN_do_locked_G_E(subb,al)
294GEN_do_locked_G_E(subw,ax)
295GEN_do_locked_G_E(subl,eax)
296
297GEN_do_locked_G_E(xorb,al)
298GEN_do_locked_G_E(xorw,ax)
299GEN_do_locked_G_E(xorl,eax)
300
301
302
303
304#define GEN_do_locked_imm_E(_name,_eax,_imm)        \
305  \
306  __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void )  \
307  {   \
308    volatile int e_val, e_val_before;   \
309    int o, s, z, a, c, p, v2, flags_in;   \
310    int block[3];   \
311    \
312    for (v2 = 0; v2 < NVALS; v2++) {   \
313    \
314    for (o = 0; o < 2; o++) {   \
315    for (s = 0; s < 2; s++) {   \
316    for (z = 0; z < 2; z++) {   \
317    for (a = 0; a < 2; a++) {   \
318    for (c = 0; c < 2; c++) {   \
319    for (p = 0; p < 2; p++) {   \
320      \
321      flags_in = (o ? CC_O : 0)   \
322               | (s ? CC_S : 0)   \
323               | (z ? CC_Z : 0)   \
324               | (a ? CC_A : 0)   \
325               | (c ? CC_C : 0)   \
326               | (p ? CC_P : 0);   \
327      \
328      e_val = val[v2];   \
329      e_val_before = e_val;   \
330      \
331      block[0] = flags_in;   \
332      block[1] = (int)(long)&e_val;   \
333      block[2] = 0;   \
334      __asm__ __volatile__(   \
335          "movl 0(%0), %%eax\n\t"   \
336          "pushl %%eax\n\t"   \
337          "popfl\n\t"   \
338          "movl 4(%0), %%ebx\n\t"   \
339          "lock; " #_name " $" #_imm ",(%%ebx)\n\t"   \
340          "pushfl\n\t"   \
341          "popl %%eax\n\t"   \
342          "movl %%eax, 8(%0)\n\t"   \
343          : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
344      );   \
345      \
346      send( \
347        sprintf(outBuf, \
348             "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",       \
349             #_name, #_imm, e_val_before, flags_in,         \
350                e_val, block[2] & CC_MASK) );               \
351      \
352    }}}}}}   \
353    \
354    }   \
355  }
356
357GEN_do_locked_imm_E(addb,al,0x7F)
358GEN_do_locked_imm_E(addb,al,0xF1)
359GEN_do_locked_imm_E(addw,ax,0x7E)
360GEN_do_locked_imm_E(addw,ax,0x9325)
361GEN_do_locked_imm_E(addl,eax,0x7D)
362GEN_do_locked_imm_E(addl,eax,0x31415927)
363
364GEN_do_locked_imm_E(orb,al,0x7F)
365GEN_do_locked_imm_E(orb,al,0xF1)
366GEN_do_locked_imm_E(orw,ax,0x7E)
367GEN_do_locked_imm_E(orw,ax,0x9325)
368GEN_do_locked_imm_E(orl,eax,0x7D)
369GEN_do_locked_imm_E(orl,eax,0x31415927)
370
371GEN_do_locked_imm_E(adcb,al,0x7F)
372GEN_do_locked_imm_E(adcb,al,0xF1)
373GEN_do_locked_imm_E(adcw,ax,0x7E)
374GEN_do_locked_imm_E(adcw,ax,0x9325)
375GEN_do_locked_imm_E(adcl,eax,0x7D)
376GEN_do_locked_imm_E(adcl,eax,0x31415927)
377
378GEN_do_locked_imm_E(sbbb,al,0x7F)
379GEN_do_locked_imm_E(sbbb,al,0xF1)
380GEN_do_locked_imm_E(sbbw,ax,0x7E)
381GEN_do_locked_imm_E(sbbw,ax,0x9325)
382GEN_do_locked_imm_E(sbbl,eax,0x7D)
383GEN_do_locked_imm_E(sbbl,eax,0x31415927)
384
385GEN_do_locked_imm_E(andb,al,0x7F)
386GEN_do_locked_imm_E(andb,al,0xF1)
387GEN_do_locked_imm_E(andw,ax,0x7E)
388GEN_do_locked_imm_E(andw,ax,0x9325)
389GEN_do_locked_imm_E(andl,eax,0x7D)
390GEN_do_locked_imm_E(andl,eax,0x31415927)
391
392GEN_do_locked_imm_E(subb,al,0x7F)
393GEN_do_locked_imm_E(subb,al,0xF1)
394GEN_do_locked_imm_E(subw,ax,0x7E)
395GEN_do_locked_imm_E(subw,ax,0x9325)
396GEN_do_locked_imm_E(subl,eax,0x7D)
397GEN_do_locked_imm_E(subl,eax,0x31415927)
398
399GEN_do_locked_imm_E(xorb,al,0x7F)
400GEN_do_locked_imm_E(xorb,al,0xF1)
401GEN_do_locked_imm_E(xorw,ax,0x7E)
402GEN_do_locked_imm_E(xorw,ax,0x9325)
403GEN_do_locked_imm_E(xorl,eax,0x7D)
404GEN_do_locked_imm_E(xorl,eax,0x31415927)
405
406#define GEN_do_locked_unary_E(_name,_eax)        \
407  \
408  __attribute__((noinline)) void do_locked_unary_E_##_name ( void )  \
409  {   \
410    volatile int e_val, e_val_before;   \
411    int o, s, z, a, c, p, v2, flags_in;   \
412    int block[3];   \
413    \
414    for (v2 = 0; v2 < NVALS; v2++) {   \
415    \
416    for (o = 0; o < 2; o++) {   \
417    for (s = 0; s < 2; s++) {   \
418    for (z = 0; z < 2; z++) {   \
419    for (a = 0; a < 2; a++) {   \
420    for (c = 0; c < 2; c++) {   \
421    for (p = 0; p < 2; p++) {   \
422      \
423      flags_in = (o ? CC_O : 0)   \
424               | (s ? CC_S : 0)   \
425               | (z ? CC_Z : 0)   \
426               | (a ? CC_A : 0)   \
427               | (c ? CC_C : 0)   \
428               | (p ? CC_P : 0);   \
429      \
430      e_val = val[v2];   \
431      e_val_before = e_val;   \
432      \
433      block[0] = flags_in;   \
434      block[1] = (int)(long)&e_val;   \
435      block[2] = 0;   \
436      __asm__ __volatile__(   \
437          "movl 0(%0), %%eax\n\t"   \
438          "pushl %%eax\n\t"   \
439          "popfl\n\t"   \
440          "movl 4(%0), %%ebx\n\t"   \
441          "lock; " #_name " (%%ebx)\n\t"   \
442          "pushfl\n\t"   \
443          "popl %%eax\n\t"   \
444          "movl %%eax, 8(%0)\n\t"   \
445          : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
446      );   \
447      \
448      send( \
449         sprintf(outBuf, \
450                "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",   \
451             #_name, e_val_before, flags_in,         \
452                e_val, block[2] & CC_MASK));         \
453      \
454    }}}}}}   \
455    \
456    }   \
457  }
458
459GEN_do_locked_unary_E(decb,al)
460GEN_do_locked_unary_E(decw,ax)
461GEN_do_locked_unary_E(decl,eax)
462
463GEN_do_locked_unary_E(incb,al)
464GEN_do_locked_unary_E(incw,ax)
465GEN_do_locked_unary_E(incl,eax)
466
467GEN_do_locked_unary_E(negb,al)
468GEN_do_locked_unary_E(negw,ax)
469GEN_do_locked_unary_E(negl,eax)
470
471GEN_do_locked_unary_E(notb,al)
472GEN_do_locked_unary_E(notw,ax)
473GEN_do_locked_unary_E(notl,eax)
474
475
476/////////////////////////////////////////////////////////////////
477
478unsigned int btsl_mem ( UChar* base, int bitno )
479{
480   unsigned char res;
481   __asm__
482   __volatile__("lock; btsl\t%2, %0\n\t"
483                "setc\t%1"
484                : "=m" (*base), "=q" (res)
485                : "r" (bitno));
486   /* Pretty meaningless to dereference base here, but that's what you
487      have to do to get a btsl insn which refers to memory starting at
488      base. */
489   return res;
490}
491unsigned int btsw_mem ( UChar* base, int bitno )
492{
493   unsigned char res;
494   __asm__
495   __volatile__("lock; btsw\t%w2, %0\n\t"
496                "setc\t%1"
497                : "=m" (*base), "=q" (res)
498                : "r" (bitno));
499   return res;
500}
501
502unsigned int btrl_mem ( UChar* base, int bitno )
503{
504   unsigned char res;
505   __asm__
506   __volatile__("lock; btrl\t%2, %0\n\t"
507                "setc\t%1"
508                : "=m" (*base), "=q" (res)
509                : "r" (bitno));
510   return res;
511}
512unsigned int btrw_mem ( UChar* base, int bitno )
513{
514   unsigned char res;
515   __asm__
516   __volatile__("lock; btrw\t%w2, %0\n\t"
517                "setc\t%1"
518                : "=m" (*base), "=q" (res)
519                : "r" (bitno));
520   return res;
521}
522
523unsigned int btcl_mem ( UChar* base, int bitno )
524{
525   unsigned char res;
526   __asm__
527   __volatile__("lock; btcl\t%2, %0\n\t"
528                "setc\t%1"
529                : "=m" (*base), "=q" (res)
530                : "r" (bitno));
531   return res;
532}
533unsigned int btcw_mem ( UChar* base, int bitno )
534{
535   unsigned char res;
536   __asm__
537   __volatile__("lock; btcw\t%w2, %0\n\t"
538                "setc\t%1"
539                : "=m" (*base), "=q" (res)
540                : "r" (bitno));
541   return res;
542}
543
544unsigned int btl_mem ( UChar* base, int bitno )
545{
546   unsigned char res;
547   __asm__
548   __volatile__("btl\t%2, %0\n\t"
549                "setc\t%1"
550                : "=m" (*base), "=q" (res)
551                : "r" (bitno)
552                : "cc", "memory");
553   return res;
554}
555unsigned int btw_mem ( UChar* base, int bitno )
556{
557   unsigned char res;
558   __asm__
559   __volatile__("btw\t%w2, %0\n\t"
560                "setc\t%1"
561                : "=m" (*base), "=q" (res)
562                : "r" (bitno));
563   return res;
564}
565
566ULong rol1 ( ULong x )
567{
568  return (x << 1) | (x >> 63);
569}
570
571void do_bt_G_E_tests ( void )
572{
573   UInt   n, bitoff, op;
574   UInt   c;
575   UChar* block;
576   ULong  carrydep, res;;
577
578   /*------------------------ MEM-L -----------------------*/
579
580   carrydep = 0;
581   block = calloc(200,1);
582   block += 100;
583   /* Valid bit offsets are -800 .. 799 inclusive. */
584
585   for (n = 0; n < 10000; n++) {
586      bitoff = (myrandom() % 1600) - 800;
587      op = myrandom() % 4;
588      c = 2;
589      switch (op) {
590         case 0: c = btsl_mem(block, bitoff); break;
591         case 1: c = btrl_mem(block, bitoff); break;
592         case 2: c = btcl_mem(block, bitoff); break;
593         case 3: c = btl_mem(block, bitoff); break;
594      }
595      c &= 255;
596      assert(c == 0 || c == 1);
597      carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
598   }
599
600   /* Compute final result */
601   block -= 100;
602   res = 0;
603   for (n = 0; n < 200; n++) {
604      UChar ch = block[n];
605      /* printf("%d ", (int)block[n]); */
606      res = rol1(res) ^ (ULong)ch;
607   }
608
609   send( sprintf(outBuf,
610                 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
611                 res, carrydep ));
612   free(block);
613
614   /*------------------------ MEM-W -----------------------*/
615
616   carrydep = 0;
617   block = calloc(200,1);
618   block += 100;
619   /* Valid bit offsets are -800 .. 799 inclusive. */
620
621   for (n = 0; n < 10000; n++) {
622      bitoff = (myrandom() % 1600) - 800;
623      op = myrandom() % 4;
624      c = 2;
625      switch (op) {
626         case 0: c = btsw_mem(block, bitoff); break;
627         case 1: c = btrw_mem(block, bitoff); break;
628         case 2: c = btcw_mem(block, bitoff); break;
629         case 3: c = btw_mem(block, bitoff); break;
630      }
631      c &= 255;
632      assert(c == 0 || c == 1);
633      carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
634   }
635
636   /* Compute final result */
637   block -= 100;
638   res = 0;
639   for (n = 0; n < 200; n++) {
640      UChar ch = block[n];
641      /* printf("%d ", (int)block[n]); */
642      res = rol1(res) ^ (ULong)ch;
643   }
644
645   send( sprintf(outBuf,
646                 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
647                 res, carrydep ));
648   free(block);
649}
650
651
652/////////////////////////////////////////////////////////////////
653
654/* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
655   also reconstruct the original bits 0, 1, 2, 3 by looking at the
656   carry flag.  Returned result has mashed bits 0-3 at the bottom and
657   the reconstructed original bits 0-3 as 4-7. */
658
659UInt mash_mem_L ( UInt* origp )
660{
661  UInt reconstructed, mashed;
662  __asm__ __volatile__ (
663     "movl %2, %%edx\n\t"
664     ""
665     "movl $0, %%eax\n\t"
666     "\n\t"
667     "btl  $0, (%%edx)\n\t"
668     "setb %%cl\n\t"
669     "movzbl %%cl, %%ecx\n\t"
670     "orl %%ecx, %%eax\n\t"
671     "\n\t"
672     "lock; btsl $1, (%%edx)\n\t"
673     "setb %%cl\n\t"
674     "movzbl %%cl, %%ecx\n\t"
675     "shll $1, %%ecx\n\t"
676     "orl %%ecx, %%eax\n\t"
677     "\n\t"
678     "lock; btrl $2, (%%edx)\n\t"
679     "setb %%cl\n\t"
680     "movzbl %%cl, %%ecx\n\t"
681     "shll $2, %%ecx\n\t"
682     "orl %%ecx, %%eax\n\t"
683     "\n\t"
684     "lock; btcl $3, (%%edx)\n\t"
685     "setb %%cl\n\t"
686     "movzbl %%cl, %%ecx\n\t"
687     "shll $3, %%ecx\n\t"
688     "orl %%ecx, %%eax\n\t"
689     "\n\t"
690     "movl %%eax, %0\n\t"
691     "movl (%%edx), %1"
692
693     : "=r" (reconstructed), "=r" (mashed)
694     : "r" (origp)
695     : "eax", "ecx", "edx", "cc");
696  return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
697}
698
699UInt mash_mem_W ( UShort* origp )
700{
701  UInt reconstructed, mashed;
702  __asm__ __volatile__ (
703     "movl %2, %%edx\n\t"
704     ""
705     "movl $0, %%eax\n\t"
706     "\n\t"
707     "btw  $0, (%%edx)\n\t"
708     "setb %%cl\n\t"
709     "movzbl %%cl, %%ecx\n\t"
710     "orl %%ecx, %%eax\n\t"
711     "\n\t"
712     "lock; btsw $1, (%%edx)\n\t"
713     "setb %%cl\n\t"
714     "movzbl %%cl, %%ecx\n\t"
715     "shll $1, %%ecx\n\t"
716     "orl %%ecx, %%eax\n\t"
717     "\n\t"
718     "lock; btrw $2, (%%edx)\n\t"
719     "setb %%cl\n\t"
720     "movzbl %%cl, %%ecx\n\t"
721     "shll $2, %%ecx\n\t"
722     "orl %%ecx, %%eax\n\t"
723     "\n\t"
724     "lock; btcw $3, (%%edx)\n\t"
725     "setb %%cl\n\t"
726     "movzbl %%cl, %%ecx\n\t"
727     "shll $3, %%ecx\n\t"
728     "orl %%ecx, %%eax\n\t"
729     "\n\t"
730     "movl %%eax, %0\n\t"
731     "movzwl (%%edx), %1"
732
733     : "=r" (reconstructed), "=r" (mashed)
734     : "r" (origp)
735     : "eax", "ecx", "edx", "cc");
736  return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
737}
738
739
740void do_bt_imm_E_tests( void )
741{
742  int i;
743  UInt*   iil = malloc(sizeof(UInt));
744  UShort* iiw = malloc(sizeof(UShort));
745  for (i = 0; i < 0x10; i++) {
746    *iil = i;
747    *iiw = i;
748    send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
749                  mash_mem_L(iil), mash_mem_W(iiw)));
750  }
751  free(iil);
752  free(iiw);
753}
754
755
756
757/////////////////////////////////////////////////////////////////
758
759int main ( void )
760{
761  do_locked_G_E_addb();
762  do_locked_G_E_addw();
763  do_locked_G_E_addl();
764
765  do_locked_G_E_orb();
766  do_locked_G_E_orw();
767  do_locked_G_E_orl();
768
769  do_locked_G_E_adcb();
770  do_locked_G_E_adcw();
771  do_locked_G_E_adcl();
772
773  do_locked_G_E_sbbb();
774  do_locked_G_E_sbbw();
775  do_locked_G_E_sbbl();
776
777  do_locked_G_E_andb();
778  do_locked_G_E_andw();
779  do_locked_G_E_andl();
780
781  do_locked_G_E_subb();
782  do_locked_G_E_subw();
783  do_locked_G_E_subl();
784
785  do_locked_G_E_xorb();
786  do_locked_G_E_xorw();
787  do_locked_G_E_xorl();
788  //21
789  do_locked_imm_E_addb_0x7F();
790  do_locked_imm_E_addb_0xF1();
791  do_locked_imm_E_addw_0x7E();
792  do_locked_imm_E_addw_0x9325();
793  do_locked_imm_E_addl_0x7D();
794  do_locked_imm_E_addl_0x31415927();
795
796  do_locked_imm_E_orb_0x7F();
797  do_locked_imm_E_orb_0xF1();
798  do_locked_imm_E_orw_0x7E();
799  do_locked_imm_E_orw_0x9325();
800  do_locked_imm_E_orl_0x7D();
801  do_locked_imm_E_orl_0x31415927();
802
803  do_locked_imm_E_adcb_0x7F();
804  do_locked_imm_E_adcb_0xF1();
805  do_locked_imm_E_adcw_0x7E();
806  do_locked_imm_E_adcw_0x9325();
807  do_locked_imm_E_adcl_0x7D();
808  do_locked_imm_E_adcl_0x31415927();
809
810  do_locked_imm_E_sbbb_0x7F();
811  do_locked_imm_E_sbbb_0xF1();
812  do_locked_imm_E_sbbw_0x7E();
813  do_locked_imm_E_sbbw_0x9325();
814  do_locked_imm_E_sbbl_0x7D();
815  do_locked_imm_E_sbbl_0x31415927();
816
817  do_locked_imm_E_andb_0x7F();
818  do_locked_imm_E_andb_0xF1();
819  do_locked_imm_E_andw_0x7E();
820  do_locked_imm_E_andw_0x9325();
821  do_locked_imm_E_andl_0x7D();
822  do_locked_imm_E_andl_0x31415927();
823
824  do_locked_imm_E_subb_0x7F();
825  do_locked_imm_E_subb_0xF1();
826  do_locked_imm_E_subw_0x7E();
827  do_locked_imm_E_subw_0x9325();
828  do_locked_imm_E_subl_0x7D();
829  do_locked_imm_E_subl_0x31415927();
830
831  do_locked_imm_E_xorb_0x7F();
832  do_locked_imm_E_xorb_0xF1();
833  do_locked_imm_E_xorw_0x7E();
834  do_locked_imm_E_xorw_0x9325();
835  do_locked_imm_E_xorl_0x7D();
836  do_locked_imm_E_xorl_0x31415927();
837  // 63
838  do_locked_unary_E_decb();
839  do_locked_unary_E_decw();
840  do_locked_unary_E_decl();
841
842  do_locked_unary_E_incb();
843  do_locked_unary_E_incw();
844  do_locked_unary_E_incl();
845
846  do_locked_unary_E_negb();
847  do_locked_unary_E_negw();
848  do_locked_unary_E_negl();
849
850  do_locked_unary_E_notb();
851  do_locked_unary_E_notw();
852  do_locked_unary_E_notl();
853  // 75
854  do_bt_G_E_tests();
855  // 81
856  do_bt_imm_E_tests();
857  // 87
858  // So there should be 87 lock-prefixed instructions in the
859  // disassembly of this compilation unit.
860  // confirm with
861  // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
862
863  { UInt crcExpd = 0xB2D75045;
864    theCRC = crcFinalise( theCRC );
865    if (theCRC == crcExpd) {
866       printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
867              theCRC, crcExpd);
868    } else {
869       printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
870              theCRC, crcExpd);
871       printf("x86locked: set #define VERBOSE 1 to diagnose\n");
872    }
873  }
874
875  return 0;
876}
877