1 2#include <stdio.h> 3#include <stdlib.h> 4#include <assert.h> 5 6#define VERBOSE 0 7 8typedef unsigned int UInt; 9typedef unsigned char UChar; 10typedef unsigned long long int ULong; 11typedef signed long long int Long; 12typedef signed int Int; 13typedef unsigned short UShort; 14typedef unsigned long UWord; 15typedef char HChar; 16 17unsigned myrandom(void) 18{ 19 /* Simple multiply-with-carry random generator. */ 20 static unsigned m_w = 11; 21 static unsigned m_z = 13; 22 23 m_z = 36969 * (m_z & 65535) + (m_z >> 16); 24 m_w = 18000 * (m_w & 65535) + (m_w >> 16); 25 26 return (m_z << 16) + m_w; 27} 28 29///////////////////////////////////////////////////////////////// 30// BEGIN crc32 stuff // 31///////////////////////////////////////////////////////////////// 32 33static const UInt crc32Table[256] = { 34 35 /*-- Ugly, innit? --*/ 36 37 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 38 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 39 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 40 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 41 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 42 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 43 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 44 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 45 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 46 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 47 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 48 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 49 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 50 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 51 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 52 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 53 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 54 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 55 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 56 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 57 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 58 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 59 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 60 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 61 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 62 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 63 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 64 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 65 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 66 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 67 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 68 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 69 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 70 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 71 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 72 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 73 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 74 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 75 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 76 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 77 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 78 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 79 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 80 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 81 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 82 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 83 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 84 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 85 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 86 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 87 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 88 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 89 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 90 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 91 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 92 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 93 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 94 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 95 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 96 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 97 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 98 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 99 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 100 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 101}; 102 103#define UPDATE_CRC(crcVar,cha) \ 104{ \ 105 crcVar = (crcVar << 8) ^ \ 106 crc32Table[(crcVar >> 24) ^ \ 107 ((UChar)cha)]; \ 108} 109 110static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn ) 111{ 112 UInt crc = crcIn; 113 while (nBytes >= 4) { 114 UPDATE_CRC(crc, bytes[0]); 115 UPDATE_CRC(crc, bytes[1]); 116 UPDATE_CRC(crc, bytes[2]); 117 UPDATE_CRC(crc, bytes[3]); 118 bytes += 4; 119 nBytes -= 4; 120 } 121 while (nBytes >= 1) { 122 UPDATE_CRC(crc, bytes[0]); 123 bytes += 1; 124 nBytes -= 1; 125 } 126 return crc; 127} 128 129static UInt crcFinalise ( UInt crc ) { 130 return ~crc; 131} 132 133//////// 134 135static UInt theCRC = 0xFFFFFFFF; 136 137static HChar outBuf[1024]; 138// take output that's in outBuf, length as specified, and 139// update the running crc. 140static void send ( int nbytes ) 141{ 142 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1); 143 assert(outBuf[nbytes] == 0); 144 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC ); 145 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf); 146} 147 148 149///////////////////////////////////////////////////////////////// 150// END crc32 stuff // 151///////////////////////////////////////////////////////////////// 152 153#if 0 154 155// full version 156#define NVALS 57 157 158static unsigned int val[NVALS] 159 = { 0x00, 0x01, 0x02, 0x03, 160 0x3F, 0x40, 0x41, 161 0x7E, 0x7F, 0x80, 0x81, 0x82, 162 0xBF, 0xC0, 0xC1, 163 0xFC, 0xFD, 0xFE, 0xFF, 164 165 0xFF00, 0xFF01, 0xFF02, 0xFF03, 166 0xFF3F, 0xFF40, 0xFF41, 167 0xFF7E, 0xFF7F, 0xFF80, 0xFF81, 0xFF82, 168 0xFFBF, 0xFFC0, 0xFFC1, 169 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF, 170 171 0xFFFFFF00, 0xFFFFFF01, 0xFFFFFF02, 0xFFFFFF03, 172 0xFFFFFF3F, 0xFFFFFF40, 0xFFFFFF41, 173 0xFFFFFF7E, 0xFFFFFF7F, 0xFFFFFF80, 0xFFFFFF81, 0xFFFFFF82, 174 0xFFFFFFBF, 0xFFFFFFC0, 0xFFFFFFC1, 175 0xFFFFFFFC, 0xFFFFFFFD, 0xFFFFFFFE, 0xFFFFFFFF 176 }; 177 178#else 179 180// shortened version, for use as valgrind regtest 181#define NVALS 27 182 183static unsigned int val[NVALS] 184 = { 0x00, 0x01, 185 0x3F, 0x40, 186 0x7F, 0x80, 187 0xBF, 0xC0, 188 0xFF, 189 190 0xFF00, 0xFF01, 191 0xFF3F, 0xFF40, 192 0xFF7F, 0xFF80, 193 0xFFBF, 0xFFC0, 194 0xFFFF, 195 196 0xFFFFFF00, 0xFFFFFF01, 197 0xFFFFFF3F, 0xFFFFFF40, 198 0xFFFFFF7F, 0xFFFFFF80, 199 0xFFFFFFBF, 0xFFFFFFC0, 200 0xFFFFFFFF 201 }; 202 203#endif 204 205///////////////////////////////////// 206 207#define CC_C 0x0001 208#define CC_P 0x0004 209#define CC_A 0x0010 210#define CC_Z 0x0040 211#define CC_S 0x0080 212#define CC_O 0x0800 213 214#define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O) 215 216#define GEN_do_locked_G_E(_name,_eax) \ 217 \ 218 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \ 219 { \ 220 volatile int e_val, g_val, e_val_before; \ 221 int o, s, z, a, c, p, v1, v2, flags_in; \ 222 int block[4]; \ 223 \ 224 for (v1 = 0; v1 < NVALS; v1++) { \ 225 for (v2 = 0; v2 < NVALS; v2++) { \ 226 \ 227 for (o = 0; o < 2; o++) { \ 228 for (s = 0; s < 2; s++) { \ 229 for (z = 0; z < 2; z++) { \ 230 for (a = 0; a < 2; a++) { \ 231 for (c = 0; c < 2; c++) { \ 232 for (p = 0; p < 2; p++) { \ 233 \ 234 flags_in = (o ? CC_O : 0) \ 235 | (s ? CC_S : 0) \ 236 | (z ? CC_Z : 0) \ 237 | (a ? CC_A : 0) \ 238 | (c ? CC_C : 0) \ 239 | (p ? CC_P : 0); \ 240 \ 241 g_val = val[v1]; \ 242 e_val = val[v2]; \ 243 e_val_before = e_val; \ 244 \ 245 block[0] = flags_in; \ 246 block[1] = g_val; \ 247 block[2] = (int)(long)&e_val; \ 248 block[3] = 0; \ 249 __asm__ __volatile__( \ 250 "movl 0(%0), %%eax\n\t" \ 251 "pushl %%eax\n\t" \ 252 "popfl\n\t" \ 253 "movl 4(%0), %%eax\n\t" \ 254 "movl 8(%0), %%ebx\n\t" \ 255 "lock; " #_name " %%" #_eax ",(%%ebx)\n\t" \ 256 "pushfl\n\t" \ 257 "popl %%eax\n\t" \ 258 "movl %%eax, 12(%0)\n\t" \ 259 : : "r"(&block[0]) : "eax","ebx","cc","memory" \ 260 ); \ 261 \ 262 send( \ 263 sprintf(outBuf, \ 264 "%s G=%08x E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \ 265 #_name, g_val, e_val_before, flags_in, \ 266 e_val, block[3] & CC_MASK) ); \ 267 \ 268 }}}}}} \ 269 \ 270 }} \ 271 } 272 273GEN_do_locked_G_E(addb,al) 274GEN_do_locked_G_E(addw,ax) 275GEN_do_locked_G_E(addl,eax) 276 277GEN_do_locked_G_E(orb, al) 278GEN_do_locked_G_E(orw, ax) 279GEN_do_locked_G_E(orl, eax) 280 281GEN_do_locked_G_E(adcb,al) 282GEN_do_locked_G_E(adcw,ax) 283GEN_do_locked_G_E(adcl,eax) 284 285GEN_do_locked_G_E(sbbb,al) 286GEN_do_locked_G_E(sbbw,ax) 287GEN_do_locked_G_E(sbbl,eax) 288 289GEN_do_locked_G_E(andb,al) 290GEN_do_locked_G_E(andw,ax) 291GEN_do_locked_G_E(andl,eax) 292 293GEN_do_locked_G_E(subb,al) 294GEN_do_locked_G_E(subw,ax) 295GEN_do_locked_G_E(subl,eax) 296 297GEN_do_locked_G_E(xorb,al) 298GEN_do_locked_G_E(xorw,ax) 299GEN_do_locked_G_E(xorl,eax) 300 301 302 303 304#define GEN_do_locked_imm_E(_name,_eax,_imm) \ 305 \ 306 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \ 307 { \ 308 volatile int e_val, e_val_before; \ 309 int o, s, z, a, c, p, v2, flags_in; \ 310 int block[3]; \ 311 \ 312 for (v2 = 0; v2 < NVALS; v2++) { \ 313 \ 314 for (o = 0; o < 2; o++) { \ 315 for (s = 0; s < 2; s++) { \ 316 for (z = 0; z < 2; z++) { \ 317 for (a = 0; a < 2; a++) { \ 318 for (c = 0; c < 2; c++) { \ 319 for (p = 0; p < 2; p++) { \ 320 \ 321 flags_in = (o ? CC_O : 0) \ 322 | (s ? CC_S : 0) \ 323 | (z ? CC_Z : 0) \ 324 | (a ? CC_A : 0) \ 325 | (c ? CC_C : 0) \ 326 | (p ? CC_P : 0); \ 327 \ 328 e_val = val[v2]; \ 329 e_val_before = e_val; \ 330 \ 331 block[0] = flags_in; \ 332 block[1] = (int)(long)&e_val; \ 333 block[2] = 0; \ 334 __asm__ __volatile__( \ 335 "movl 0(%0), %%eax\n\t" \ 336 "pushl %%eax\n\t" \ 337 "popfl\n\t" \ 338 "movl 4(%0), %%ebx\n\t" \ 339 "lock; " #_name " $" #_imm ",(%%ebx)\n\t" \ 340 "pushfl\n\t" \ 341 "popl %%eax\n\t" \ 342 "movl %%eax, 8(%0)\n\t" \ 343 : : "r"(&block[0]) : "eax","ebx","cc","memory" \ 344 ); \ 345 \ 346 send( \ 347 sprintf(outBuf, \ 348 "%s I=%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \ 349 #_name, #_imm, e_val_before, flags_in, \ 350 e_val, block[2] & CC_MASK) ); \ 351 \ 352 }}}}}} \ 353 \ 354 } \ 355 } 356 357GEN_do_locked_imm_E(addb,al,0x7F) 358GEN_do_locked_imm_E(addb,al,0xF1) 359GEN_do_locked_imm_E(addw,ax,0x7E) 360GEN_do_locked_imm_E(addw,ax,0x9325) 361GEN_do_locked_imm_E(addl,eax,0x7D) 362GEN_do_locked_imm_E(addl,eax,0x31415927) 363 364GEN_do_locked_imm_E(orb,al,0x7F) 365GEN_do_locked_imm_E(orb,al,0xF1) 366GEN_do_locked_imm_E(orw,ax,0x7E) 367GEN_do_locked_imm_E(orw,ax,0x9325) 368GEN_do_locked_imm_E(orl,eax,0x7D) 369GEN_do_locked_imm_E(orl,eax,0x31415927) 370 371GEN_do_locked_imm_E(adcb,al,0x7F) 372GEN_do_locked_imm_E(adcb,al,0xF1) 373GEN_do_locked_imm_E(adcw,ax,0x7E) 374GEN_do_locked_imm_E(adcw,ax,0x9325) 375GEN_do_locked_imm_E(adcl,eax,0x7D) 376GEN_do_locked_imm_E(adcl,eax,0x31415927) 377 378GEN_do_locked_imm_E(sbbb,al,0x7F) 379GEN_do_locked_imm_E(sbbb,al,0xF1) 380GEN_do_locked_imm_E(sbbw,ax,0x7E) 381GEN_do_locked_imm_E(sbbw,ax,0x9325) 382GEN_do_locked_imm_E(sbbl,eax,0x7D) 383GEN_do_locked_imm_E(sbbl,eax,0x31415927) 384 385GEN_do_locked_imm_E(andb,al,0x7F) 386GEN_do_locked_imm_E(andb,al,0xF1) 387GEN_do_locked_imm_E(andw,ax,0x7E) 388GEN_do_locked_imm_E(andw,ax,0x9325) 389GEN_do_locked_imm_E(andl,eax,0x7D) 390GEN_do_locked_imm_E(andl,eax,0x31415927) 391 392GEN_do_locked_imm_E(subb,al,0x7F) 393GEN_do_locked_imm_E(subb,al,0xF1) 394GEN_do_locked_imm_E(subw,ax,0x7E) 395GEN_do_locked_imm_E(subw,ax,0x9325) 396GEN_do_locked_imm_E(subl,eax,0x7D) 397GEN_do_locked_imm_E(subl,eax,0x31415927) 398 399GEN_do_locked_imm_E(xorb,al,0x7F) 400GEN_do_locked_imm_E(xorb,al,0xF1) 401GEN_do_locked_imm_E(xorw,ax,0x7E) 402GEN_do_locked_imm_E(xorw,ax,0x9325) 403GEN_do_locked_imm_E(xorl,eax,0x7D) 404GEN_do_locked_imm_E(xorl,eax,0x31415927) 405 406#define GEN_do_locked_unary_E(_name,_eax) \ 407 \ 408 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \ 409 { \ 410 volatile int e_val, e_val_before; \ 411 int o, s, z, a, c, p, v2, flags_in; \ 412 int block[3]; \ 413 \ 414 for (v2 = 0; v2 < NVALS; v2++) { \ 415 \ 416 for (o = 0; o < 2; o++) { \ 417 for (s = 0; s < 2; s++) { \ 418 for (z = 0; z < 2; z++) { \ 419 for (a = 0; a < 2; a++) { \ 420 for (c = 0; c < 2; c++) { \ 421 for (p = 0; p < 2; p++) { \ 422 \ 423 flags_in = (o ? CC_O : 0) \ 424 | (s ? CC_S : 0) \ 425 | (z ? CC_Z : 0) \ 426 | (a ? CC_A : 0) \ 427 | (c ? CC_C : 0) \ 428 | (p ? CC_P : 0); \ 429 \ 430 e_val = val[v2]; \ 431 e_val_before = e_val; \ 432 \ 433 block[0] = flags_in; \ 434 block[1] = (int)(long)&e_val; \ 435 block[2] = 0; \ 436 __asm__ __volatile__( \ 437 "movl 0(%0), %%eax\n\t" \ 438 "pushl %%eax\n\t" \ 439 "popfl\n\t" \ 440 "movl 4(%0), %%ebx\n\t" \ 441 "lock; " #_name " (%%ebx)\n\t" \ 442 "pushfl\n\t" \ 443 "popl %%eax\n\t" \ 444 "movl %%eax, 8(%0)\n\t" \ 445 : : "r"(&block[0]) : "eax","ebx","cc","memory" \ 446 ); \ 447 \ 448 send( \ 449 sprintf(outBuf, \ 450 "%s E=%08x CCIN=%08x -> E=%08x CCOUT=%08x\n", \ 451 #_name, e_val_before, flags_in, \ 452 e_val, block[2] & CC_MASK)); \ 453 \ 454 }}}}}} \ 455 \ 456 } \ 457 } 458 459GEN_do_locked_unary_E(decb,al) 460GEN_do_locked_unary_E(decw,ax) 461GEN_do_locked_unary_E(decl,eax) 462 463GEN_do_locked_unary_E(incb,al) 464GEN_do_locked_unary_E(incw,ax) 465GEN_do_locked_unary_E(incl,eax) 466 467GEN_do_locked_unary_E(negb,al) 468GEN_do_locked_unary_E(negw,ax) 469GEN_do_locked_unary_E(negl,eax) 470 471GEN_do_locked_unary_E(notb,al) 472GEN_do_locked_unary_E(notw,ax) 473GEN_do_locked_unary_E(notl,eax) 474 475 476///////////////////////////////////////////////////////////////// 477 478unsigned int btsl_mem ( UChar* base, int bitno ) 479{ 480 unsigned char res; 481 __asm__ 482 __volatile__("lock; btsl\t%2, %0\n\t" 483 "setc\t%1" 484 : "=m" (*base), "=q" (res) 485 : "r" (bitno)); 486 /* Pretty meaningless to dereference base here, but that's what you 487 have to do to get a btsl insn which refers to memory starting at 488 base. */ 489 return res; 490} 491unsigned int btsw_mem ( UChar* base, int bitno ) 492{ 493 unsigned char res; 494 __asm__ 495 __volatile__("lock; btsw\t%w2, %0\n\t" 496 "setc\t%1" 497 : "=m" (*base), "=q" (res) 498 : "r" (bitno)); 499 return res; 500} 501 502unsigned int btrl_mem ( UChar* base, int bitno ) 503{ 504 unsigned char res; 505 __asm__ 506 __volatile__("lock; btrl\t%2, %0\n\t" 507 "setc\t%1" 508 : "=m" (*base), "=q" (res) 509 : "r" (bitno)); 510 return res; 511} 512unsigned int btrw_mem ( UChar* base, int bitno ) 513{ 514 unsigned char res; 515 __asm__ 516 __volatile__("lock; btrw\t%w2, %0\n\t" 517 "setc\t%1" 518 : "=m" (*base), "=q" (res) 519 : "r" (bitno)); 520 return res; 521} 522 523unsigned int btcl_mem ( UChar* base, int bitno ) 524{ 525 unsigned char res; 526 __asm__ 527 __volatile__("lock; btcl\t%2, %0\n\t" 528 "setc\t%1" 529 : "=m" (*base), "=q" (res) 530 : "r" (bitno)); 531 return res; 532} 533unsigned int btcw_mem ( UChar* base, int bitno ) 534{ 535 unsigned char res; 536 __asm__ 537 __volatile__("lock; btcw\t%w2, %0\n\t" 538 "setc\t%1" 539 : "=m" (*base), "=q" (res) 540 : "r" (bitno)); 541 return res; 542} 543 544unsigned int btl_mem ( UChar* base, int bitno ) 545{ 546 unsigned char res; 547 __asm__ 548 __volatile__("btl\t%2, %0\n\t" 549 "setc\t%1" 550 : "=m" (*base), "=q" (res) 551 : "r" (bitno) 552 : "cc", "memory"); 553 return res; 554} 555unsigned int btw_mem ( UChar* base, int bitno ) 556{ 557 unsigned char res; 558 __asm__ 559 __volatile__("btw\t%w2, %0\n\t" 560 "setc\t%1" 561 : "=m" (*base), "=q" (res) 562 : "r" (bitno)); 563 return res; 564} 565 566ULong rol1 ( ULong x ) 567{ 568 return (x << 1) | (x >> 63); 569} 570 571void do_bt_G_E_tests ( void ) 572{ 573 UInt n, bitoff, op; 574 UInt c; 575 UChar* block; 576 ULong carrydep, res;; 577 578 /*------------------------ MEM-L -----------------------*/ 579 580 carrydep = 0; 581 block = calloc(200,1); 582 block += 100; 583 /* Valid bit offsets are -800 .. 799 inclusive. */ 584 585 for (n = 0; n < 10000; n++) { 586 bitoff = (myrandom() % 1600) - 800; 587 op = myrandom() % 4; 588 c = 2; 589 switch (op) { 590 case 0: c = btsl_mem(block, bitoff); break; 591 case 1: c = btrl_mem(block, bitoff); break; 592 case 2: c = btcl_mem(block, bitoff); break; 593 case 3: c = btl_mem(block, bitoff); break; 594 } 595 c &= 255; 596 assert(c == 0 || c == 1); 597 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 598 } 599 600 /* Compute final result */ 601 block -= 100; 602 res = 0; 603 for (n = 0; n < 200; n++) { 604 UChar ch = block[n]; 605 /* printf("%d ", (int)block[n]); */ 606 res = rol1(res) ^ (ULong)ch; 607 } 608 609 send( sprintf(outBuf, 610 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n", 611 res, carrydep )); 612 free(block); 613 614 /*------------------------ MEM-W -----------------------*/ 615 616 carrydep = 0; 617 block = calloc(200,1); 618 block += 100; 619 /* Valid bit offsets are -800 .. 799 inclusive. */ 620 621 for (n = 0; n < 10000; n++) { 622 bitoff = (myrandom() % 1600) - 800; 623 op = myrandom() % 4; 624 c = 2; 625 switch (op) { 626 case 0: c = btsw_mem(block, bitoff); break; 627 case 1: c = btrw_mem(block, bitoff); break; 628 case 2: c = btcw_mem(block, bitoff); break; 629 case 3: c = btw_mem(block, bitoff); break; 630 } 631 c &= 255; 632 assert(c == 0 || c == 1); 633 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 634 } 635 636 /* Compute final result */ 637 block -= 100; 638 res = 0; 639 for (n = 0; n < 200; n++) { 640 UChar ch = block[n]; 641 /* printf("%d ", (int)block[n]); */ 642 res = rol1(res) ^ (ULong)ch; 643 } 644 645 send( sprintf(outBuf, 646 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n", 647 res, carrydep )); 648 free(block); 649} 650 651 652///////////////////////////////////////////////////////////////// 653 654/* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and 655 also reconstruct the original bits 0, 1, 2, 3 by looking at the 656 carry flag. Returned result has mashed bits 0-3 at the bottom and 657 the reconstructed original bits 0-3 as 4-7. */ 658 659UInt mash_mem_L ( UInt* origp ) 660{ 661 UInt reconstructed, mashed; 662 __asm__ __volatile__ ( 663 "movl %2, %%edx\n\t" 664 "" 665 "movl $0, %%eax\n\t" 666 "\n\t" 667 "btl $0, (%%edx)\n\t" 668 "setb %%cl\n\t" 669 "movzbl %%cl, %%ecx\n\t" 670 "orl %%ecx, %%eax\n\t" 671 "\n\t" 672 "lock; btsl $1, (%%edx)\n\t" 673 "setb %%cl\n\t" 674 "movzbl %%cl, %%ecx\n\t" 675 "shll $1, %%ecx\n\t" 676 "orl %%ecx, %%eax\n\t" 677 "\n\t" 678 "lock; btrl $2, (%%edx)\n\t" 679 "setb %%cl\n\t" 680 "movzbl %%cl, %%ecx\n\t" 681 "shll $2, %%ecx\n\t" 682 "orl %%ecx, %%eax\n\t" 683 "\n\t" 684 "lock; btcl $3, (%%edx)\n\t" 685 "setb %%cl\n\t" 686 "movzbl %%cl, %%ecx\n\t" 687 "shll $3, %%ecx\n\t" 688 "orl %%ecx, %%eax\n\t" 689 "\n\t" 690 "movl %%eax, %0\n\t" 691 "movl (%%edx), %1" 692 693 : "=r" (reconstructed), "=r" (mashed) 694 : "r" (origp) 695 : "eax", "ecx", "edx", "cc"); 696 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 697} 698 699UInt mash_mem_W ( UShort* origp ) 700{ 701 UInt reconstructed, mashed; 702 __asm__ __volatile__ ( 703 "movl %2, %%edx\n\t" 704 "" 705 "movl $0, %%eax\n\t" 706 "\n\t" 707 "btw $0, (%%edx)\n\t" 708 "setb %%cl\n\t" 709 "movzbl %%cl, %%ecx\n\t" 710 "orl %%ecx, %%eax\n\t" 711 "\n\t" 712 "lock; btsw $1, (%%edx)\n\t" 713 "setb %%cl\n\t" 714 "movzbl %%cl, %%ecx\n\t" 715 "shll $1, %%ecx\n\t" 716 "orl %%ecx, %%eax\n\t" 717 "\n\t" 718 "lock; btrw $2, (%%edx)\n\t" 719 "setb %%cl\n\t" 720 "movzbl %%cl, %%ecx\n\t" 721 "shll $2, %%ecx\n\t" 722 "orl %%ecx, %%eax\n\t" 723 "\n\t" 724 "lock; btcw $3, (%%edx)\n\t" 725 "setb %%cl\n\t" 726 "movzbl %%cl, %%ecx\n\t" 727 "shll $3, %%ecx\n\t" 728 "orl %%ecx, %%eax\n\t" 729 "\n\t" 730 "movl %%eax, %0\n\t" 731 "movzwl (%%edx), %1" 732 733 : "=r" (reconstructed), "=r" (mashed) 734 : "r" (origp) 735 : "eax", "ecx", "edx", "cc"); 736 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 737} 738 739 740void do_bt_imm_E_tests( void ) 741{ 742 int i; 743 UInt* iil = malloc(sizeof(UInt)); 744 UShort* iiw = malloc(sizeof(UShort)); 745 for (i = 0; i < 0x10; i++) { 746 *iil = i; 747 *iiw = i; 748 send( sprintf(outBuf, "0x%x -> 0x%02x 0x%02x\n", i, 749 mash_mem_L(iil), mash_mem_W(iiw))); 750 } 751 free(iil); 752 free(iiw); 753} 754 755 756 757///////////////////////////////////////////////////////////////// 758 759int main ( void ) 760{ 761 do_locked_G_E_addb(); 762 do_locked_G_E_addw(); 763 do_locked_G_E_addl(); 764 765 do_locked_G_E_orb(); 766 do_locked_G_E_orw(); 767 do_locked_G_E_orl(); 768 769 do_locked_G_E_adcb(); 770 do_locked_G_E_adcw(); 771 do_locked_G_E_adcl(); 772 773 do_locked_G_E_sbbb(); 774 do_locked_G_E_sbbw(); 775 do_locked_G_E_sbbl(); 776 777 do_locked_G_E_andb(); 778 do_locked_G_E_andw(); 779 do_locked_G_E_andl(); 780 781 do_locked_G_E_subb(); 782 do_locked_G_E_subw(); 783 do_locked_G_E_subl(); 784 785 do_locked_G_E_xorb(); 786 do_locked_G_E_xorw(); 787 do_locked_G_E_xorl(); 788 //21 789 do_locked_imm_E_addb_0x7F(); 790 do_locked_imm_E_addb_0xF1(); 791 do_locked_imm_E_addw_0x7E(); 792 do_locked_imm_E_addw_0x9325(); 793 do_locked_imm_E_addl_0x7D(); 794 do_locked_imm_E_addl_0x31415927(); 795 796 do_locked_imm_E_orb_0x7F(); 797 do_locked_imm_E_orb_0xF1(); 798 do_locked_imm_E_orw_0x7E(); 799 do_locked_imm_E_orw_0x9325(); 800 do_locked_imm_E_orl_0x7D(); 801 do_locked_imm_E_orl_0x31415927(); 802 803 do_locked_imm_E_adcb_0x7F(); 804 do_locked_imm_E_adcb_0xF1(); 805 do_locked_imm_E_adcw_0x7E(); 806 do_locked_imm_E_adcw_0x9325(); 807 do_locked_imm_E_adcl_0x7D(); 808 do_locked_imm_E_adcl_0x31415927(); 809 810 do_locked_imm_E_sbbb_0x7F(); 811 do_locked_imm_E_sbbb_0xF1(); 812 do_locked_imm_E_sbbw_0x7E(); 813 do_locked_imm_E_sbbw_0x9325(); 814 do_locked_imm_E_sbbl_0x7D(); 815 do_locked_imm_E_sbbl_0x31415927(); 816 817 do_locked_imm_E_andb_0x7F(); 818 do_locked_imm_E_andb_0xF1(); 819 do_locked_imm_E_andw_0x7E(); 820 do_locked_imm_E_andw_0x9325(); 821 do_locked_imm_E_andl_0x7D(); 822 do_locked_imm_E_andl_0x31415927(); 823 824 do_locked_imm_E_subb_0x7F(); 825 do_locked_imm_E_subb_0xF1(); 826 do_locked_imm_E_subw_0x7E(); 827 do_locked_imm_E_subw_0x9325(); 828 do_locked_imm_E_subl_0x7D(); 829 do_locked_imm_E_subl_0x31415927(); 830 831 do_locked_imm_E_xorb_0x7F(); 832 do_locked_imm_E_xorb_0xF1(); 833 do_locked_imm_E_xorw_0x7E(); 834 do_locked_imm_E_xorw_0x9325(); 835 do_locked_imm_E_xorl_0x7D(); 836 do_locked_imm_E_xorl_0x31415927(); 837 // 63 838 do_locked_unary_E_decb(); 839 do_locked_unary_E_decw(); 840 do_locked_unary_E_decl(); 841 842 do_locked_unary_E_incb(); 843 do_locked_unary_E_incw(); 844 do_locked_unary_E_incl(); 845 846 do_locked_unary_E_negb(); 847 do_locked_unary_E_negw(); 848 do_locked_unary_E_negl(); 849 850 do_locked_unary_E_notb(); 851 do_locked_unary_E_notw(); 852 do_locked_unary_E_notl(); 853 // 75 854 do_bt_G_E_tests(); 855 // 81 856 do_bt_imm_E_tests(); 857 // 87 858 // So there should be 87 lock-prefixed instructions in the 859 // disassembly of this compilation unit. 860 // confirm with 861 // objdump -d ./x86locked | grep lock | grep -v do_lock | grep -v elf32 | wc 862 863 { UInt crcExpd = 0xB2D75045; 864 theCRC = crcFinalise( theCRC ); 865 if (theCRC == crcExpd) { 866 printf("x86locked: PASS: CRCs actual 0x%08X expected 0x%08X\n", 867 theCRC, crcExpd); 868 } else { 869 printf("x86locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n", 870 theCRC, crcExpd); 871 printf("x86locked: set #define VERBOSE 1 to diagnose\n"); 872 } 873 } 874 875 return 0; 876} 877