1
2#include <stdio.h>
3#include <stdlib.h>
4#include <assert.h>
5
6#define VERBOSE 0
7
8typedef  unsigned int            UInt;
9typedef  unsigned char           UChar;
10typedef  unsigned long long int  ULong;
11typedef  signed long long int    Long;
12typedef  signed int              Int;
13typedef  unsigned short          UShort;
14typedef  unsigned long           UWord;
15typedef  char                    HChar;
16
17/////////////////////////////////////////////////////////////////
18// BEGIN crc32 stuff                                           //
19/////////////////////////////////////////////////////////////////
20
21static const UInt crc32Table[256] = {
22
23   /*-- Ugly, innit? --*/
24
25   0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L,
26   0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L,
27   0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L,
28   0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL,
29   0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L,
30   0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L,
31   0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L,
32   0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL,
33   0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L,
34   0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L,
35   0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L,
36   0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL,
37   0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L,
38   0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L,
39   0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L,
40   0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL,
41   0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL,
42   0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L,
43   0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L,
44   0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL,
45   0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL,
46   0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L,
47   0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L,
48   0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL,
49   0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL,
50   0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L,
51   0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L,
52   0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL,
53   0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL,
54   0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L,
55   0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L,
56   0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL,
57   0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L,
58   0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL,
59   0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL,
60   0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L,
61   0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L,
62   0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL,
63   0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL,
64   0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L,
65   0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L,
66   0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL,
67   0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL,
68   0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L,
69   0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L,
70   0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL,
71   0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL,
72   0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L,
73   0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L,
74   0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL,
75   0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L,
76   0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L,
77   0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L,
78   0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL,
79   0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L,
80   0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L,
81   0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L,
82   0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL,
83   0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L,
84   0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L,
85   0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L,
86   0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL,
87   0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L,
88   0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L
89};
90
91#define UPDATE_CRC(crcVar,cha)                 \
92{                                              \
93   crcVar = (crcVar << 8) ^                    \
94            crc32Table[(crcVar >> 24) ^        \
95                       ((UChar)cha)];          \
96}
97
98static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn )
99{
100   UInt crc = crcIn;
101   while (nBytes >= 4) {
102      UPDATE_CRC(crc, bytes[0]);
103      UPDATE_CRC(crc, bytes[1]);
104      UPDATE_CRC(crc, bytes[2]);
105      UPDATE_CRC(crc, bytes[3]);
106      bytes += 4;
107      nBytes -= 4;
108   }
109   while (nBytes >= 1) {
110      UPDATE_CRC(crc, bytes[0]);
111      bytes += 1;
112      nBytes -= 1;
113   }
114   return crc;
115}
116
117static UInt crcFinalise ( UInt crc ) {
118   return ~crc;
119}
120
121////////
122
123static UInt theCRC = 0xFFFFFFFF;
124
125static HChar outBuf[1024];
126// take output that's in outBuf, length as specified, and
127// update the running crc.
128static void send ( int nbytes )
129{
130   assert( ((unsigned int)nbytes) < sizeof(outBuf)-1);
131   assert(outBuf[nbytes] == 0);
132   theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC );
133   if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf);
134}
135
136
137/////////////////////////////////////////////////////////////////
138// END crc32 stuff                                             //
139/////////////////////////////////////////////////////////////////
140
141#if 0
142
143// full version
144#define NVALS 57
145
146static unsigned int val[NVALS]
147    = { 0x00, 0x01, 0x02, 0x03,
148        0x3F, 0x40, 0x41,
149        0x7E, 0x7F, 0x80, 0x81, 0x82,
150        0xBF, 0xC0, 0xC1,
151        0xFC, 0xFD, 0xFE, 0xFF,
152
153        0xFF00, 0xFF01, 0xFF02, 0xFF03,
154        0xFF3F, 0xFF40, 0xFF41,
155        0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82,
156        0xFFBF, 0xFFC0, 0xFFC1,
157        0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF,
158
159        0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03,
160        0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41,
161        0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82,
162        0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1,
163        0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF
164      };
165
166#else
167
168// shortened version, for use as valgrind regtest
169#define NVALS 27
170
171static unsigned int val[NVALS]
172    = { 0x00, 0x01,
173        0x3F, 0x40,
174        0x7F, 0x80,
175        0xBF, 0xC0,
176        0xFF,
177
178        0xFF00, 0xFF01,
179        0xFF3F, 0xFF40,
180        0xFF7F, 0xFF80,
181        0xFFBF, 0xFFC0,
182        0xFFFF,
183
184        0xFFFFFF00, 0xFFFFFF01,
185        0xFFFFFF3F, 0xFFFFFF40,
186        0xFFFFFF7F, 0xFFFFFF80,
187        0xFFFFFFBF, 0xFFFFFFC0,
188        0xFFFFFFFF
189      };
190
191#endif
192
193/////////////////////////////////////
194
195#define CC_C    0x0001
196#define CC_P    0x0004
197#define CC_A    0x0010
198#define CC_Z    0x0040
199#define CC_S    0x0080
200#define CC_O    0x0800
201
202#define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O)
203
204#define GEN_do_locked_G_E(_name,_eax)   \
205  \
206  __attribute__((noinline)) void do_locked_G_E_##_name ( void )  \
207  {   \
208    volatile int e_val, g_val, e_val_before;   \
209    int o, s, z, a, c, p, v1, v2, flags_in;   \
210    int block[4];   \
211    \
212    for (v1 = 0; v1 < NVALS; v1++) {   \
213    for (v2 = 0; v2 < NVALS; v2++) {   \
214    \
215    for (o = 0; o < 2; o++) {   \
216    for (s = 0; s < 2; s++) {   \
217    for (z = 0; z < 2; z++) {   \
218    for (a = 0; a < 2; a++) {   \
219    for (c = 0; c < 2; c++) {   \
220    for (p = 0; p < 2; p++) {   \
221      \
222      flags_in = (o ? CC_O : 0)   \
223               | (s ? CC_S : 0)   \
224               | (z ? CC_Z : 0)   \
225               | (a ? CC_A : 0)   \
226               | (c ? CC_C : 0)   \
227               | (p ? CC_P : 0);   \
228      \
229      g_val = val[v1];   \
230      e_val = val[v2];   \
231      e_val_before = e_val;   \
232      \
233      block[0] = flags_in;   \
234      block[1] = g_val;   \
235      block[2] = (int)(long)&e_val;   \
236      block[3] = 0;   \
237      __asm__ __volatile__(   \
238          "movl 0(%0), %%eax\n\t"   \
239          "pushl %%eax\n\t"   \
240          "popfl\n\t"   \
241          "movl 4(%0), %%eax\n\t"   \
242          "movl 8(%0), %%ebx\n\t"   \
243          "lock; " #_name " %%" #_eax ",(%%ebx)\n\t"   \
244          "pushfl\n\t"   \
245          "popl %%eax\n\t"   \
246          "movl %%eax, 12(%0)\n\t"   \
247          : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
248      );   \
249      \
250      send( \
251         sprintf(outBuf,                                        \
252                 "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \
253                 #_name, g_val, e_val_before, flags_in,   \
254                 e_val, block[3] & CC_MASK) );            \
255      \
256    }}}}}}   \
257    \
258    }}   \
259  }
260
261GEN_do_locked_G_E(addb,al)
262GEN_do_locked_G_E(addw,ax)
263GEN_do_locked_G_E(addl,eax)
264
265GEN_do_locked_G_E(orb, al)
266GEN_do_locked_G_E(orw, ax)
267GEN_do_locked_G_E(orl, eax)
268
269GEN_do_locked_G_E(adcb,al)
270GEN_do_locked_G_E(adcw,ax)
271GEN_do_locked_G_E(adcl,eax)
272
273GEN_do_locked_G_E(sbbb,al)
274GEN_do_locked_G_E(sbbw,ax)
275GEN_do_locked_G_E(sbbl,eax)
276
277GEN_do_locked_G_E(andb,al)
278GEN_do_locked_G_E(andw,ax)
279GEN_do_locked_G_E(andl,eax)
280
281GEN_do_locked_G_E(subb,al)
282GEN_do_locked_G_E(subw,ax)
283GEN_do_locked_G_E(subl,eax)
284
285GEN_do_locked_G_E(xorb,al)
286GEN_do_locked_G_E(xorw,ax)
287GEN_do_locked_G_E(xorl,eax)
288
289
290
291
292#define GEN_do_locked_imm_E(_name,_eax,_imm)        \
293  \
294  __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void )  \
295  {   \
296    volatile int e_val, e_val_before;   \
297    int o, s, z, a, c, p, v2, flags_in;   \
298    int block[3];   \
299    \
300    for (v2 = 0; v2 < NVALS; v2++) {   \
301    \
302    for (o = 0; o < 2; o++) {   \
303    for (s = 0; s < 2; s++) {   \
304    for (z = 0; z < 2; z++) {   \
305    for (a = 0; a < 2; a++) {   \
306    for (c = 0; c < 2; c++) {   \
307    for (p = 0; p < 2; p++) {   \
308      \
309      flags_in = (o ? CC_O : 0)   \
310               | (s ? CC_S : 0)   \
311               | (z ? CC_Z : 0)   \
312               | (a ? CC_A : 0)   \
313               | (c ? CC_C : 0)   \
314               | (p ? CC_P : 0);   \
315      \
316      e_val = val[v2];   \
317      e_val_before = e_val;   \
318      \
319      block[0] = flags_in;   \
320      block[1] = (int)(long)&e_val;   \
321      block[2] = 0;   \
322      __asm__ __volatile__(   \
323          "movl 0(%0), %%eax\n\t"   \
324          "pushl %%eax\n\t"   \
325          "popfl\n\t"   \
326          "movl 4(%0), %%ebx\n\t"   \
327          "lock; " #_name " $" #_imm ",(%%ebx)\n\t"   \
328          "pushfl\n\t"   \
329          "popl %%eax\n\t"   \
330          "movl %%eax, 8(%0)\n\t"   \
331          : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
332      );   \
333      \
334      send( \
335        sprintf(outBuf, \
336             "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",       \
337             #_name, #_imm, e_val_before, flags_in,         \
338                e_val, block[2] & CC_MASK) );               \
339      \
340    }}}}}}   \
341    \
342    }   \
343  }
344
345GEN_do_locked_imm_E(addb,al,0x7F)
346GEN_do_locked_imm_E(addb,al,0xF1)
347GEN_do_locked_imm_E(addw,ax,0x7E)
348GEN_do_locked_imm_E(addw,ax,0x9325)
349GEN_do_locked_imm_E(addl,eax,0x7D)
350GEN_do_locked_imm_E(addl,eax,0x31415927)
351
352GEN_do_locked_imm_E(orb,al,0x7F)
353GEN_do_locked_imm_E(orb,al,0xF1)
354GEN_do_locked_imm_E(orw,ax,0x7E)
355GEN_do_locked_imm_E(orw,ax,0x9325)
356GEN_do_locked_imm_E(orl,eax,0x7D)
357GEN_do_locked_imm_E(orl,eax,0x31415927)
358
359GEN_do_locked_imm_E(adcb,al,0x7F)
360GEN_do_locked_imm_E(adcb,al,0xF1)
361GEN_do_locked_imm_E(adcw,ax,0x7E)
362GEN_do_locked_imm_E(adcw,ax,0x9325)
363GEN_do_locked_imm_E(adcl,eax,0x7D)
364GEN_do_locked_imm_E(adcl,eax,0x31415927)
365
366GEN_do_locked_imm_E(sbbb,al,0x7F)
367GEN_do_locked_imm_E(sbbb,al,0xF1)
368GEN_do_locked_imm_E(sbbw,ax,0x7E)
369GEN_do_locked_imm_E(sbbw,ax,0x9325)
370GEN_do_locked_imm_E(sbbl,eax,0x7D)
371GEN_do_locked_imm_E(sbbl,eax,0x31415927)
372
373GEN_do_locked_imm_E(andb,al,0x7F)
374GEN_do_locked_imm_E(andb,al,0xF1)
375GEN_do_locked_imm_E(andw,ax,0x7E)
376GEN_do_locked_imm_E(andw,ax,0x9325)
377GEN_do_locked_imm_E(andl,eax,0x7D)
378GEN_do_locked_imm_E(andl,eax,0x31415927)
379
380GEN_do_locked_imm_E(subb,al,0x7F)
381GEN_do_locked_imm_E(subb,al,0xF1)
382GEN_do_locked_imm_E(subw,ax,0x7E)
383GEN_do_locked_imm_E(subw,ax,0x9325)
384GEN_do_locked_imm_E(subl,eax,0x7D)
385GEN_do_locked_imm_E(subl,eax,0x31415927)
386
387GEN_do_locked_imm_E(xorb,al,0x7F)
388GEN_do_locked_imm_E(xorb,al,0xF1)
389GEN_do_locked_imm_E(xorw,ax,0x7E)
390GEN_do_locked_imm_E(xorw,ax,0x9325)
391GEN_do_locked_imm_E(xorl,eax,0x7D)
392GEN_do_locked_imm_E(xorl,eax,0x31415927)
393
394#define GEN_do_locked_unary_E(_name,_eax)        \
395  \
396  __attribute__((noinline)) void do_locked_unary_E_##_name ( void )  \
397  {   \
398    volatile int e_val, e_val_before;   \
399    int o, s, z, a, c, p, v2, flags_in;   \
400    int block[3];   \
401    \
402    for (v2 = 0; v2 < NVALS; v2++) {   \
403    \
404    for (o = 0; o < 2; o++) {   \
405    for (s = 0; s < 2; s++) {   \
406    for (z = 0; z < 2; z++) {   \
407    for (a = 0; a < 2; a++) {   \
408    for (c = 0; c < 2; c++) {   \
409    for (p = 0; p < 2; p++) {   \
410      \
411      flags_in = (o ? CC_O : 0)   \
412               | (s ? CC_S : 0)   \
413               | (z ? CC_Z : 0)   \
414               | (a ? CC_A : 0)   \
415               | (c ? CC_C : 0)   \
416               | (p ? CC_P : 0);   \
417      \
418      e_val = val[v2];   \
419      e_val_before = e_val;   \
420      \
421      block[0] = flags_in;   \
422      block[1] = (int)(long)&e_val;   \
423      block[2] = 0;   \
424      __asm__ __volatile__(   \
425          "movl 0(%0), %%eax\n\t"   \
426          "pushl %%eax\n\t"   \
427          "popfl\n\t"   \
428          "movl 4(%0), %%ebx\n\t"   \
429          "lock; " #_name " (%%ebx)\n\t"   \
430          "pushfl\n\t"   \
431          "popl %%eax\n\t"   \
432          "movl %%eax, 8(%0)\n\t"   \
433          : : "r"(&block[0]) : "eax","ebx","cc","memory"   \
434      );   \
435      \
436      send( \
437         sprintf(outBuf, \
438                "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n",   \
439             #_name, e_val_before, flags_in,         \
440                e_val, block[2] & CC_MASK));         \
441      \
442    }}}}}}   \
443    \
444    }   \
445  }
446
447GEN_do_locked_unary_E(decb,al)
448GEN_do_locked_unary_E(decw,ax)
449GEN_do_locked_unary_E(decl,eax)
450
451GEN_do_locked_unary_E(incb,al)
452GEN_do_locked_unary_E(incw,ax)
453GEN_do_locked_unary_E(incl,eax)
454
455GEN_do_locked_unary_E(negb,al)
456GEN_do_locked_unary_E(negw,ax)
457GEN_do_locked_unary_E(negl,eax)
458
459GEN_do_locked_unary_E(notb,al)
460GEN_do_locked_unary_E(notw,ax)
461GEN_do_locked_unary_E(notl,eax)
462
463
464/////////////////////////////////////////////////////////////////
465
466unsigned int btsl_mem ( UChar* base, int bitno )
467{
468   unsigned char res;
469   __asm__
470   __volatile__("lock; btsl\t%2, %0\n\t"
471                "setc\t%1"
472                : "=m" (*base), "=q" (res)
473                : "r" (bitno));
474   /* Pretty meaningless to dereference base here, but that's what you
475      have to do to get a btsl insn which refers to memory starting at
476      base. */
477   return res;
478}
479unsigned int btsw_mem ( UChar* base, int bitno )
480{
481   unsigned char res;
482   __asm__
483   __volatile__("lock; btsw\t%w2, %0\n\t"
484                "setc\t%1"
485                : "=m" (*base), "=q" (res)
486                : "r" (bitno));
487   return res;
488}
489
490unsigned int btrl_mem ( UChar* base, int bitno )
491{
492   unsigned char res;
493   __asm__
494   __volatile__("lock; btrl\t%2, %0\n\t"
495                "setc\t%1"
496                : "=m" (*base), "=q" (res)
497                : "r" (bitno));
498   return res;
499}
500unsigned int btrw_mem ( UChar* base, int bitno )
501{
502   unsigned char res;
503   __asm__
504   __volatile__("lock; btrw\t%w2, %0\n\t"
505                "setc\t%1"
506                : "=m" (*base), "=q" (res)
507                : "r" (bitno));
508   return res;
509}
510
511unsigned int btcl_mem ( UChar* base, int bitno )
512{
513   unsigned char res;
514   __asm__
515   __volatile__("lock; btcl\t%2, %0\n\t"
516                "setc\t%1"
517                : "=m" (*base), "=q" (res)
518                : "r" (bitno));
519   return res;
520}
521unsigned int btcw_mem ( UChar* base, int bitno )
522{
523   unsigned char res;
524   __asm__
525   __volatile__("lock; btcw\t%w2, %0\n\t"
526                "setc\t%1"
527                : "=m" (*base), "=q" (res)
528                : "r" (bitno));
529   return res;
530}
531
532unsigned int btl_mem ( UChar* base, int bitno )
533{
534   unsigned char res;
535   __asm__
536   __volatile__("btl\t%2, %0\n\t"
537                "setc\t%1"
538                : "=m" (*base), "=q" (res)
539                : "r" (bitno)
540                : "cc", "memory");
541   return res;
542}
543unsigned int btw_mem ( UChar* base, int bitno )
544{
545   unsigned char res;
546   __asm__
547   __volatile__("btw\t%w2, %0\n\t"
548                "setc\t%1"
549                : "=m" (*base), "=q" (res)
550                : "r" (bitno));
551   return res;
552}
553
554ULong rol1 ( ULong x )
555{
556  return (x << 1) | (x >> 63);
557}
558
559void do_bt_G_E_tests ( void )
560{
561   UInt   n, bitoff, op;
562   UInt   c;
563   UChar* block;
564   ULong  carrydep, res;;
565
566   /*------------------------ MEM-L -----------------------*/
567
568   carrydep = 0;
569   block = calloc(200,1);
570   block += 100;
571   /* Valid bit offsets are -800 .. 799 inclusive. */
572
573   for (n = 0; n < 10000; n++) {
574      bitoff = (random() % 1600) - 800;
575      op = random() % 4;
576      c = 2;
577      switch (op) {
578         case 0: c = btsl_mem(block, bitoff); break;
579         case 1: c = btrl_mem(block, bitoff); break;
580         case 2: c = btcl_mem(block, bitoff); break;
581         case 3: c = btl_mem(block, bitoff); break;
582      }
583      c &= 255;
584      assert(c == 0 || c == 1);
585      carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
586   }
587
588   /* Compute final result */
589   block -= 100;
590   res = 0;
591   for (n = 0; n < 200; n++) {
592      UChar ch = block[n];
593      /* printf("%d ", (int)block[n]); */
594      res = rol1(res) ^ (ULong)ch;
595   }
596
597   send( sprintf(outBuf,
598                 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n",
599                 res, carrydep ));
600   free(block);
601
602   /*------------------------ MEM-W -----------------------*/
603
604   carrydep = 0;
605   block = calloc(200,1);
606   block += 100;
607   /* Valid bit offsets are -800 .. 799 inclusive. */
608
609   for (n = 0; n < 10000; n++) {
610      bitoff = (random() % 1600) - 800;
611      op = random() % 4;
612      c = 2;
613      switch (op) {
614         case 0: c = btsw_mem(block, bitoff); break;
615         case 1: c = btrw_mem(block, bitoff); break;
616         case 2: c = btcw_mem(block, bitoff); break;
617         case 3: c = btw_mem(block, bitoff); break;
618      }
619      c &= 255;
620      assert(c == 0 || c == 1);
621      carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep;
622   }
623
624   /* Compute final result */
625   block -= 100;
626   res = 0;
627   for (n = 0; n < 200; n++) {
628      UChar ch = block[n];
629      /* printf("%d ", (int)block[n]); */
630      res = rol1(res) ^ (ULong)ch;
631   }
632
633   send( sprintf(outBuf,
634                 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n",
635                 res, carrydep ));
636   free(block);
637}
638
639
640/////////////////////////////////////////////////////////////////
641
642/* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and
643   also reconstruct the original bits 0, 1, 2, 3 by looking at the
644   carry flag.  Returned result has mashed bits 0-3 at the bottom and
645   the reconstructed original bits 0-3 as 4-7. */
646
647UInt mash_mem_L ( UInt* origp )
648{
649  UInt reconstructed, mashed;
650  __asm__ __volatile__ (
651     "movl %2, %%edx\n\t"
652     ""
653     "movl $0, %%eax\n\t"
654     "\n\t"
655     "btl  $0, (%%edx)\n\t"
656     "setb %%cl\n\t"
657     "movzbl %%cl, %%ecx\n\t"
658     "orl %%ecx, %%eax\n\t"
659     "\n\t"
660     "lock; btsl $1, (%%edx)\n\t"
661     "setb %%cl\n\t"
662     "movzbl %%cl, %%ecx\n\t"
663     "shll $1, %%ecx\n\t"
664     "orl %%ecx, %%eax\n\t"
665     "\n\t"
666     "lock; btrl $2, (%%edx)\n\t"
667     "setb %%cl\n\t"
668     "movzbl %%cl, %%ecx\n\t"
669     "shll $2, %%ecx\n\t"
670     "orl %%ecx, %%eax\n\t"
671     "\n\t"
672     "lock; btcl $3, (%%edx)\n\t"
673     "setb %%cl\n\t"
674     "movzbl %%cl, %%ecx\n\t"
675     "shll $3, %%ecx\n\t"
676     "orl %%ecx, %%eax\n\t"
677     "\n\t"
678     "movl %%eax, %0\n\t"
679     "movl (%%edx), %1"
680
681     : "=r" (reconstructed), "=r" (mashed)
682     : "r" (origp)
683     : "eax", "ecx", "edx", "cc");
684  return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
685}
686
687UInt mash_mem_W ( UShort* origp )
688{
689  UInt reconstructed, mashed;
690  __asm__ __volatile__ (
691     "movl %2, %%edx\n\t"
692     ""
693     "movl $0, %%eax\n\t"
694     "\n\t"
695     "btw  $0, (%%edx)\n\t"
696     "setb %%cl\n\t"
697     "movzbl %%cl, %%ecx\n\t"
698     "orl %%ecx, %%eax\n\t"
699     "\n\t"
700     "lock; btsw $1, (%%edx)\n\t"
701     "setb %%cl\n\t"
702     "movzbl %%cl, %%ecx\n\t"
703     "shll $1, %%ecx\n\t"
704     "orl %%ecx, %%eax\n\t"
705     "\n\t"
706     "lock; btrw $2, (%%edx)\n\t"
707     "setb %%cl\n\t"
708     "movzbl %%cl, %%ecx\n\t"
709     "shll $2, %%ecx\n\t"
710     "orl %%ecx, %%eax\n\t"
711     "\n\t"
712     "lock; btcw $3, (%%edx)\n\t"
713     "setb %%cl\n\t"
714     "movzbl %%cl, %%ecx\n\t"
715     "shll $3, %%ecx\n\t"
716     "orl %%ecx, %%eax\n\t"
717     "\n\t"
718     "movl %%eax, %0\n\t"
719     "movzwl (%%edx), %1"
720
721     : "=r" (reconstructed), "=r" (mashed)
722     : "r" (origp)
723     : "eax", "ecx", "edx", "cc");
724  return (mashed & 0xF) | ((reconstructed & 0xF) << 4);
725}
726
727
728void do_bt_imm_E_tests( void )
729{
730  int i;
731  UInt*   iil = malloc(sizeof(UInt));
732  UShort* iiw = malloc(sizeof(UShort));
733  for (i = 0; i < 0x10; i++) {
734    *iil = i;
735    *iiw = i;
736    send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i,
737                  mash_mem_L(iil), mash_mem_W(iiw)));
738  }
739  free(iil);
740  free(iiw);
741}
742
743
744
745/////////////////////////////////////////////////////////////////
746
747int main ( void )
748{
749  do_locked_G_E_addb();
750  do_locked_G_E_addw();
751  do_locked_G_E_addl();
752
753  do_locked_G_E_orb();
754  do_locked_G_E_orw();
755  do_locked_G_E_orl();
756
757  do_locked_G_E_adcb();
758  do_locked_G_E_adcw();
759  do_locked_G_E_adcl();
760
761  do_locked_G_E_sbbb();
762  do_locked_G_E_sbbw();
763  do_locked_G_E_sbbl();
764
765  do_locked_G_E_andb();
766  do_locked_G_E_andw();
767  do_locked_G_E_andl();
768
769  do_locked_G_E_subb();
770  do_locked_G_E_subw();
771  do_locked_G_E_subl();
772
773  do_locked_G_E_xorb();
774  do_locked_G_E_xorw();
775  do_locked_G_E_xorl();
776  //21
777  do_locked_imm_E_addb_0x7F();
778  do_locked_imm_E_addb_0xF1();
779  do_locked_imm_E_addw_0x7E();
780  do_locked_imm_E_addw_0x9325();
781  do_locked_imm_E_addl_0x7D();
782  do_locked_imm_E_addl_0x31415927();
783
784  do_locked_imm_E_orb_0x7F();
785  do_locked_imm_E_orb_0xF1();
786  do_locked_imm_E_orw_0x7E();
787  do_locked_imm_E_orw_0x9325();
788  do_locked_imm_E_orl_0x7D();
789  do_locked_imm_E_orl_0x31415927();
790
791  do_locked_imm_E_adcb_0x7F();
792  do_locked_imm_E_adcb_0xF1();
793  do_locked_imm_E_adcw_0x7E();
794  do_locked_imm_E_adcw_0x9325();
795  do_locked_imm_E_adcl_0x7D();
796  do_locked_imm_E_adcl_0x31415927();
797
798  do_locked_imm_E_sbbb_0x7F();
799  do_locked_imm_E_sbbb_0xF1();
800  do_locked_imm_E_sbbw_0x7E();
801  do_locked_imm_E_sbbw_0x9325();
802  do_locked_imm_E_sbbl_0x7D();
803  do_locked_imm_E_sbbl_0x31415927();
804
805  do_locked_imm_E_andb_0x7F();
806  do_locked_imm_E_andb_0xF1();
807  do_locked_imm_E_andw_0x7E();
808  do_locked_imm_E_andw_0x9325();
809  do_locked_imm_E_andl_0x7D();
810  do_locked_imm_E_andl_0x31415927();
811
812  do_locked_imm_E_subb_0x7F();
813  do_locked_imm_E_subb_0xF1();
814  do_locked_imm_E_subw_0x7E();
815  do_locked_imm_E_subw_0x9325();
816  do_locked_imm_E_subl_0x7D();
817  do_locked_imm_E_subl_0x31415927();
818
819  do_locked_imm_E_xorb_0x7F();
820  do_locked_imm_E_xorb_0xF1();
821  do_locked_imm_E_xorw_0x7E();
822  do_locked_imm_E_xorw_0x9325();
823  do_locked_imm_E_xorl_0x7D();
824  do_locked_imm_E_xorl_0x31415927();
825  // 63
826  do_locked_unary_E_decb();
827  do_locked_unary_E_decw();
828  do_locked_unary_E_decl();
829
830  do_locked_unary_E_incb();
831  do_locked_unary_E_incw();
832  do_locked_unary_E_incl();
833
834  do_locked_unary_E_negb();
835  do_locked_unary_E_negw();
836  do_locked_unary_E_negl();
837
838  do_locked_unary_E_notb();
839  do_locked_unary_E_notw();
840  do_locked_unary_E_notl();
841  // 75
842  do_bt_G_E_tests();
843  // 81
844  do_bt_imm_E_tests();
845  // 87
846  // So there should be 87 lock-prefixed instructions in the
847  // disassembly of this compilation unit.
848  // confirm with
849  // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc
850
851  { UInt crcExpd = 0x8235DC9C;
852    theCRC = crcFinalise( theCRC );
853    if (theCRC == crcExpd) {
854       printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n",
855              theCRC, crcExpd);
856    } else {
857       printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n",
858              theCRC, crcExpd);
859       printf("x86locked: set #define VERBOSE 1 to diagnose\n");
860    }
861  }
862
863  return 0;
864}
865