amd64locked.c revision e739ac0589b4fb43561f801c4faba8c1b89f8680
1 2#include <stdio.h> 3#include <stdlib.h> 4#include <assert.h> 5 6#define VERBOSE 0 7 8typedef unsigned int UInt; 9typedef unsigned char UChar; 10typedef unsigned long long int ULong; 11typedef signed long long int Long; 12typedef signed int Int; 13typedef unsigned short UShort; 14typedef unsigned long UWord; 15typedef char HChar; 16 17///////////////////////////////////////////////////////////////// 18// BEGIN crc32 stuff // 19///////////////////////////////////////////////////////////////// 20 21static const UInt crc32Table[256] = { 22 23 /*-- Ugly, innit? --*/ 24 25 0x00000000L, 0x04c11db7L, 0x09823b6eL, 0x0d4326d9L, 26 0x130476dcL, 0x17c56b6bL, 0x1a864db2L, 0x1e475005L, 27 0x2608edb8L, 0x22c9f00fL, 0x2f8ad6d6L, 0x2b4bcb61L, 28 0x350c9b64L, 0x31cd86d3L, 0x3c8ea00aL, 0x384fbdbdL, 29 0x4c11db70L, 0x48d0c6c7L, 0x4593e01eL, 0x4152fda9L, 30 0x5f15adacL, 0x5bd4b01bL, 0x569796c2L, 0x52568b75L, 31 0x6a1936c8L, 0x6ed82b7fL, 0x639b0da6L, 0x675a1011L, 32 0x791d4014L, 0x7ddc5da3L, 0x709f7b7aL, 0x745e66cdL, 33 0x9823b6e0L, 0x9ce2ab57L, 0x91a18d8eL, 0x95609039L, 34 0x8b27c03cL, 0x8fe6dd8bL, 0x82a5fb52L, 0x8664e6e5L, 35 0xbe2b5b58L, 0xbaea46efL, 0xb7a96036L, 0xb3687d81L, 36 0xad2f2d84L, 0xa9ee3033L, 0xa4ad16eaL, 0xa06c0b5dL, 37 0xd4326d90L, 0xd0f37027L, 0xddb056feL, 0xd9714b49L, 38 0xc7361b4cL, 0xc3f706fbL, 0xceb42022L, 0xca753d95L, 39 0xf23a8028L, 0xf6fb9d9fL, 0xfbb8bb46L, 0xff79a6f1L, 40 0xe13ef6f4L, 0xe5ffeb43L, 0xe8bccd9aL, 0xec7dd02dL, 41 0x34867077L, 0x30476dc0L, 0x3d044b19L, 0x39c556aeL, 42 0x278206abL, 0x23431b1cL, 0x2e003dc5L, 0x2ac12072L, 43 0x128e9dcfL, 0x164f8078L, 0x1b0ca6a1L, 0x1fcdbb16L, 44 0x018aeb13L, 0x054bf6a4L, 0x0808d07dL, 0x0cc9cdcaL, 45 0x7897ab07L, 0x7c56b6b0L, 0x71159069L, 0x75d48ddeL, 46 0x6b93dddbL, 0x6f52c06cL, 0x6211e6b5L, 0x66d0fb02L, 47 0x5e9f46bfL, 0x5a5e5b08L, 0x571d7dd1L, 0x53dc6066L, 48 0x4d9b3063L, 0x495a2dd4L, 0x44190b0dL, 0x40d816baL, 49 0xaca5c697L, 0xa864db20L, 0xa527fdf9L, 0xa1e6e04eL, 50 0xbfa1b04bL, 0xbb60adfcL, 0xb6238b25L, 0xb2e29692L, 51 0x8aad2b2fL, 0x8e6c3698L, 0x832f1041L, 0x87ee0df6L, 52 0x99a95df3L, 0x9d684044L, 0x902b669dL, 0x94ea7b2aL, 53 0xe0b41de7L, 0xe4750050L, 0xe9362689L, 0xedf73b3eL, 54 0xf3b06b3bL, 0xf771768cL, 0xfa325055L, 0xfef34de2L, 55 0xc6bcf05fL, 0xc27dede8L, 0xcf3ecb31L, 0xcbffd686L, 56 0xd5b88683L, 0xd1799b34L, 0xdc3abdedL, 0xd8fba05aL, 57 0x690ce0eeL, 0x6dcdfd59L, 0x608edb80L, 0x644fc637L, 58 0x7a089632L, 0x7ec98b85L, 0x738aad5cL, 0x774bb0ebL, 59 0x4f040d56L, 0x4bc510e1L, 0x46863638L, 0x42472b8fL, 60 0x5c007b8aL, 0x58c1663dL, 0x558240e4L, 0x51435d53L, 61 0x251d3b9eL, 0x21dc2629L, 0x2c9f00f0L, 0x285e1d47L, 62 0x36194d42L, 0x32d850f5L, 0x3f9b762cL, 0x3b5a6b9bL, 63 0x0315d626L, 0x07d4cb91L, 0x0a97ed48L, 0x0e56f0ffL, 64 0x1011a0faL, 0x14d0bd4dL, 0x19939b94L, 0x1d528623L, 65 0xf12f560eL, 0xf5ee4bb9L, 0xf8ad6d60L, 0xfc6c70d7L, 66 0xe22b20d2L, 0xe6ea3d65L, 0xeba91bbcL, 0xef68060bL, 67 0xd727bbb6L, 0xd3e6a601L, 0xdea580d8L, 0xda649d6fL, 68 0xc423cd6aL, 0xc0e2d0ddL, 0xcda1f604L, 0xc960ebb3L, 69 0xbd3e8d7eL, 0xb9ff90c9L, 0xb4bcb610L, 0xb07daba7L, 70 0xae3afba2L, 0xaafbe615L, 0xa7b8c0ccL, 0xa379dd7bL, 71 0x9b3660c6L, 0x9ff77d71L, 0x92b45ba8L, 0x9675461fL, 72 0x8832161aL, 0x8cf30badL, 0x81b02d74L, 0x857130c3L, 73 0x5d8a9099L, 0x594b8d2eL, 0x5408abf7L, 0x50c9b640L, 74 0x4e8ee645L, 0x4a4ffbf2L, 0x470cdd2bL, 0x43cdc09cL, 75 0x7b827d21L, 0x7f436096L, 0x7200464fL, 0x76c15bf8L, 76 0x68860bfdL, 0x6c47164aL, 0x61043093L, 0x65c52d24L, 77 0x119b4be9L, 0x155a565eL, 0x18197087L, 0x1cd86d30L, 78 0x029f3d35L, 0x065e2082L, 0x0b1d065bL, 0x0fdc1becL, 79 0x3793a651L, 0x3352bbe6L, 0x3e119d3fL, 0x3ad08088L, 80 0x2497d08dL, 0x2056cd3aL, 0x2d15ebe3L, 0x29d4f654L, 81 0xc5a92679L, 0xc1683bceL, 0xcc2b1d17L, 0xc8ea00a0L, 82 0xd6ad50a5L, 0xd26c4d12L, 0xdf2f6bcbL, 0xdbee767cL, 83 0xe3a1cbc1L, 0xe760d676L, 0xea23f0afL, 0xeee2ed18L, 84 0xf0a5bd1dL, 0xf464a0aaL, 0xf9278673L, 0xfde69bc4L, 85 0x89b8fd09L, 0x8d79e0beL, 0x803ac667L, 0x84fbdbd0L, 86 0x9abc8bd5L, 0x9e7d9662L, 0x933eb0bbL, 0x97ffad0cL, 87 0xafb010b1L, 0xab710d06L, 0xa6322bdfL, 0xa2f33668L, 88 0xbcb4666dL, 0xb8757bdaL, 0xb5365d03L, 0xb1f740b4L 89}; 90 91#define UPDATE_CRC(crcVar,cha) \ 92{ \ 93 crcVar = (crcVar << 8) ^ \ 94 crc32Table[(crcVar >> 24) ^ \ 95 ((UChar)cha)]; \ 96} 97 98static UInt crcBytes ( UChar* bytes, UWord nBytes, UInt crcIn ) 99{ 100 UInt crc = crcIn; 101 while (nBytes >= 4) { 102 UPDATE_CRC(crc, bytes[0]); 103 UPDATE_CRC(crc, bytes[1]); 104 UPDATE_CRC(crc, bytes[2]); 105 UPDATE_CRC(crc, bytes[3]); 106 bytes += 4; 107 nBytes -= 4; 108 } 109 while (nBytes >= 1) { 110 UPDATE_CRC(crc, bytes[0]); 111 bytes += 1; 112 nBytes -= 1; 113 } 114 return crc; 115} 116 117static UInt crcFinalise ( UInt crc ) { 118 return ~crc; 119} 120 121//////// 122 123static UInt theCRC = 0xFFFFFFFF; 124 125static HChar outBuf[1024]; 126// take output that's in outBuf, length as specified, and 127// update the running crc. 128static void send ( int nbytes ) 129{ 130 assert( ((unsigned int)nbytes) < sizeof(outBuf)-1); 131 assert(outBuf[nbytes] == 0); 132 theCRC = crcBytes( (UChar*)&outBuf[0], nbytes, theCRC ); 133 if (VERBOSE) printf("SEND %08x %s", theCRC, outBuf); 134} 135 136 137///////////////////////////////////////////////////////////////// 138// END crc32 stuff // 139///////////////////////////////////////////////////////////////// 140 141#if 0 142 143// full version 144#define NVALS 76 145 146static ULong val[NVALS] 147 = { 0x00ULL, 0x01ULL, 0x02ULL, 0x03ULL, 148 0x3FULL, 0x40ULL, 0x41ULL, 149 0x7EULL, 0x7FULL, 0x80ULL, 0x81ULL, 0x82ULL, 150 0xBFULL, 0xC0ULL, 0xC1ULL, 151 0xFCULL, 0xFDULL, 0xFEULL, 0xFFULL, 152 153 0xFF00ULL, 0xFF01ULL, 0xFF02ULL, 0xFF03ULL, 154 0xFF3FULL, 0xFF40ULL, 0xFF41ULL, 155 0xFF7EULL, 0xFF7FULL, 0xFF80ULL, 0xFF81ULL, 0xFF82ULL, 156 0xFFBFULL, 0xFFC0ULL, 0xFFC1ULL, 157 0xFFFCULL, 0xFFFDULL, 0xFFFEULL, 0xFFFFULL, 158 159 0xFFFFFF00ULL, 0xFFFFFF01ULL, 0xFFFFFF02ULL, 0xFFFFFF03ULL, 160 0xFFFFFF3FULL, 0xFFFFFF40ULL, 0xFFFFFF41ULL, 161 0xFFFFFF7EULL, 0xFFFFFF7FULL, 0xFFFFFF80ULL, 0xFFFFFF81ULL, 0xFFFFFF82ULL, 162 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 0xFFFFFFC1ULL, 163 0xFFFFFFFCULL, 0xFFFFFFFDULL, 0xFFFFFFFEULL, 0xFFFFFFFFULL, 164 165 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 0xFFFFFFFFFFFFFF02ULL, 166 0xFFFFFFFFFFFFFF03ULL, 167 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 0xFFFFFFFFFFFFFF41ULL, 168 0xFFFFFFFFFFFFFF7EULL, 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL, 169 0xFFFFFFFFFFFFFF81ULL, 0xFFFFFFFFFFFFFF82ULL, 170 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 0xFFFFFFFFFFFFFFC1ULL, 171 0xFFFFFFFFFFFFFFFCULL, 0xFFFFFFFFFFFFFFFDULL, 0xFFFFFFFFFFFFFFFEULL, 172 0xFFFFFFFFFFFFFFFFULL 173 }; 174 175#else 176 177// shortened version, for use as valgrind regtest 178#define NVALS 36 179 180static ULong val[NVALS] 181 = { 0x00ULL, 0x01ULL, 182 0x3FULL, 0x40ULL, 183 0x7FULL, 0x80ULL, 184 0xBFULL, 0xC0ULL, 185 0xFFULL, 186 187 0xFF00ULL, 0xFF01ULL, 188 0xFF3FULL, 0xFF40ULL, 189 0xFF7FULL, 0xFF80ULL, 190 0xFFBFULL, 0xFFC0ULL, 191 0xFFFFULL, 192 193 0xFFFFFF00ULL, 0xFFFFFF01ULL, 194 0xFFFFFF3FULL, 0xFFFFFF40ULL, 195 0xFFFFFF7EULL, 0xFFFFFF7FULL, 196 0xFFFFFFBFULL, 0xFFFFFFC0ULL, 197 0xFFFFFFFFULL, 198 199 0xFFFFFFFFFFFFFF00ULL, 0xFFFFFFFFFFFFFF01ULL, 200 0xFFFFFFFFFFFFFF3FULL, 0xFFFFFFFFFFFFFF40ULL, 201 0xFFFFFFFFFFFFFF7FULL, 0xFFFFFFFFFFFFFF80ULL, 202 0xFFFFFFFFFFFFFFBFULL, 0xFFFFFFFFFFFFFFC0ULL, 203 0xFFFFFFFFFFFFFFFFULL 204 }; 205 206#endif 207 208///////////////////////////////////// 209 210#define CC_C 0x0001 211#define CC_P 0x0004 212#define CC_A 0x0010 213#define CC_Z 0x0040 214#define CC_S 0x0080 215#define CC_O 0x0800 216 217#define CC_MASK (CC_C | CC_P | CC_A | CC_Z | CC_S | CC_O) 218 219#define GEN_do_locked_G_E(_name,_eax) \ 220 \ 221 __attribute__((noinline)) void do_locked_G_E_##_name ( void ) \ 222 { \ 223 volatile Long e_val, g_val, e_val_before; \ 224 Long o, s, z, a, c, p, v1, v2, flags_in; \ 225 Long block[4]; \ 226 \ 227 for (v1 = 0; v1 < NVALS; v1++) { \ 228 for (v2 = 0; v2 < NVALS; v2++) { \ 229 \ 230 for (o = 0; o < 2; o++) { \ 231 for (s = 0; s < 2; s++) { \ 232 for (z = 0; z < 2; z++) { \ 233 for (a = 0; a < 2; a++) { \ 234 for (c = 0; c < 2; c++) { \ 235 for (p = 0; p < 2; p++) { \ 236 \ 237 flags_in = (o ? CC_O : 0) \ 238 | (s ? CC_S : 0) \ 239 | (z ? CC_Z : 0) \ 240 | (a ? CC_A : 0) \ 241 | (c ? CC_C : 0) \ 242 | (p ? CC_P : 0); \ 243 \ 244 g_val = val[v1]; \ 245 e_val = val[v2]; \ 246 e_val_before = e_val; \ 247 \ 248 block[0] = flags_in; \ 249 block[1] = g_val; \ 250 block[2] = (long)&e_val; \ 251 block[3] = 0; \ 252 __asm__ __volatile__( \ 253 "movq 0(%0), %%rax\n\t" \ 254 "pushq %%rax\n\t" \ 255 "popfq\n\t" \ 256 "movq 8(%0), %%rax\n\t" \ 257 "movq 16(%0), %%rbx\n\t" \ 258 "lock; " #_name " %%" #_eax ",(%%rbx)\n\t" \ 259 "pushfq\n\t" \ 260 "popq %%rax\n\t" \ 261 "movq %%rax, 24(%0)\n\t" \ 262 : : "r"(&block[0]) : "rax","rbx","cc","memory" \ 263 ); \ 264 \ 265 send( \ 266 sprintf(outBuf, \ 267 "%s G=%016llx E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \ 268 #_name, g_val, e_val_before, flags_in, \ 269 e_val, block[3] & CC_MASK)); \ 270 \ 271 }}}}}} \ 272 \ 273 }} \ 274 } 275 276GEN_do_locked_G_E(addb,al) 277GEN_do_locked_G_E(addw,ax) 278GEN_do_locked_G_E(addl,eax) 279GEN_do_locked_G_E(addq,rax) 280 281GEN_do_locked_G_E(orb, al) 282GEN_do_locked_G_E(orw, ax) 283GEN_do_locked_G_E(orl, eax) 284GEN_do_locked_G_E(orq, rax) 285 286GEN_do_locked_G_E(adcb,al) 287GEN_do_locked_G_E(adcw,ax) 288GEN_do_locked_G_E(adcl,eax) 289GEN_do_locked_G_E(adcq,rax) 290 291GEN_do_locked_G_E(sbbb,al) 292GEN_do_locked_G_E(sbbw,ax) 293GEN_do_locked_G_E(sbbl,eax) 294GEN_do_locked_G_E(sbbq,rax) 295 296GEN_do_locked_G_E(andb,al) 297GEN_do_locked_G_E(andw,ax) 298GEN_do_locked_G_E(andl,eax) 299GEN_do_locked_G_E(andq,rax) 300 301GEN_do_locked_G_E(subb,al) 302GEN_do_locked_G_E(subw,ax) 303GEN_do_locked_G_E(subl,eax) 304GEN_do_locked_G_E(subq,rax) 305 306GEN_do_locked_G_E(xorb,al) 307GEN_do_locked_G_E(xorw,ax) 308GEN_do_locked_G_E(xorl,eax) 309GEN_do_locked_G_E(xorq,rax) 310 311 312 313 314#define GEN_do_locked_imm_E(_name,_eax,_imm) \ 315 \ 316 __attribute__((noinline)) void do_locked_imm_E_##_name##_##_imm ( void ) \ 317 { \ 318 volatile Long e_val, e_val_before; \ 319 Long o, s, z, a, c, p, v2, flags_in; \ 320 Long block[3]; \ 321 \ 322 for (v2 = 0; v2 < NVALS; v2++) { \ 323 \ 324 for (o = 0; o < 2; o++) { \ 325 for (s = 0; s < 2; s++) { \ 326 for (z = 0; z < 2; z++) { \ 327 for (a = 0; a < 2; a++) { \ 328 for (c = 0; c < 2; c++) { \ 329 for (p = 0; p < 2; p++) { \ 330 \ 331 flags_in = (o ? CC_O : 0) \ 332 | (s ? CC_S : 0) \ 333 | (z ? CC_Z : 0) \ 334 | (a ? CC_A : 0) \ 335 | (c ? CC_C : 0) \ 336 | (p ? CC_P : 0); \ 337 \ 338 e_val = val[v2]; \ 339 e_val_before = e_val; \ 340 \ 341 block[0] = flags_in; \ 342 block[1] = (long)&e_val; \ 343 block[2] = 0; \ 344 __asm__ __volatile__( \ 345 "movq 0(%0), %%rax\n\t" \ 346 "pushq %%rax\n\t" \ 347 "popfq\n\t" \ 348 "movq 8(%0), %%rbx\n\t" \ 349 "lock; " #_name " $" #_imm ",(%%rbx)\n\t" \ 350 "pushfq\n\t" \ 351 "popq %%rax\n\t" \ 352 "movq %%rax, 16(%0)\n\t" \ 353 : : "r"(&block[0]) : "rax","rbx","cc","memory" \ 354 ); \ 355 \ 356 send( \ 357 sprintf(outBuf, \ 358 "%s I=%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \ 359 #_name, #_imm, e_val_before, flags_in, \ 360 e_val, block[2] & CC_MASK)); \ 361 \ 362 }}}}}} \ 363 \ 364 } \ 365 } 366 367GEN_do_locked_imm_E(addb,al,0x7F) 368GEN_do_locked_imm_E(addb,al,0xF1) 369GEN_do_locked_imm_E(addw,ax,0x7E) 370GEN_do_locked_imm_E(addw,ax,0x9325) 371GEN_do_locked_imm_E(addl,eax,0x7D) 372GEN_do_locked_imm_E(addl,eax,0x31415927) 373GEN_do_locked_imm_E(addq,rax,0x7D) 374GEN_do_locked_imm_E(addq,rax,0x31415927) 375 376GEN_do_locked_imm_E(orb,al,0x7F) 377GEN_do_locked_imm_E(orb,al,0xF1) 378GEN_do_locked_imm_E(orw,ax,0x7E) 379GEN_do_locked_imm_E(orw,ax,0x9325) 380GEN_do_locked_imm_E(orl,eax,0x7D) 381GEN_do_locked_imm_E(orl,eax,0x31415927) 382GEN_do_locked_imm_E(orq,rax,0x7D) 383GEN_do_locked_imm_E(orq,rax,0x31415927) 384 385GEN_do_locked_imm_E(adcb,al,0x7F) 386GEN_do_locked_imm_E(adcb,al,0xF1) 387GEN_do_locked_imm_E(adcw,ax,0x7E) 388GEN_do_locked_imm_E(adcw,ax,0x9325) 389GEN_do_locked_imm_E(adcl,eax,0x7D) 390GEN_do_locked_imm_E(adcl,eax,0x31415927) 391GEN_do_locked_imm_E(adcq,rax,0x7D) 392GEN_do_locked_imm_E(adcq,rax,0x31415927) 393 394GEN_do_locked_imm_E(sbbb,al,0x7F) 395GEN_do_locked_imm_E(sbbb,al,0xF1) 396GEN_do_locked_imm_E(sbbw,ax,0x7E) 397GEN_do_locked_imm_E(sbbw,ax,0x9325) 398GEN_do_locked_imm_E(sbbl,eax,0x7D) 399GEN_do_locked_imm_E(sbbl,eax,0x31415927) 400GEN_do_locked_imm_E(sbbq,rax,0x7D) 401GEN_do_locked_imm_E(sbbq,rax,0x31415927) 402 403GEN_do_locked_imm_E(andb,al,0x7F) 404GEN_do_locked_imm_E(andb,al,0xF1) 405GEN_do_locked_imm_E(andw,ax,0x7E) 406GEN_do_locked_imm_E(andw,ax,0x9325) 407GEN_do_locked_imm_E(andl,eax,0x7D) 408GEN_do_locked_imm_E(andl,eax,0x31415927) 409GEN_do_locked_imm_E(andq,rax,0x7D) 410GEN_do_locked_imm_E(andq,rax,0x31415927) 411 412GEN_do_locked_imm_E(subb,al,0x7F) 413GEN_do_locked_imm_E(subb,al,0xF1) 414GEN_do_locked_imm_E(subw,ax,0x7E) 415GEN_do_locked_imm_E(subw,ax,0x9325) 416GEN_do_locked_imm_E(subl,eax,0x7D) 417GEN_do_locked_imm_E(subl,eax,0x31415927) 418GEN_do_locked_imm_E(subq,rax,0x7D) 419GEN_do_locked_imm_E(subq,rax,0x31415927) 420 421GEN_do_locked_imm_E(xorb,al,0x7F) 422GEN_do_locked_imm_E(xorb,al,0xF1) 423GEN_do_locked_imm_E(xorw,ax,0x7E) 424GEN_do_locked_imm_E(xorw,ax,0x9325) 425GEN_do_locked_imm_E(xorl,eax,0x7D) 426GEN_do_locked_imm_E(xorl,eax,0x31415927) 427GEN_do_locked_imm_E(xorq,rax,0x7D) 428GEN_do_locked_imm_E(xorq,rax,0x31415927) 429 430#define GEN_do_locked_unary_E(_name,_eax) \ 431 \ 432 __attribute__((noinline)) void do_locked_unary_E_##_name ( void ) \ 433 { \ 434 volatile Long e_val, e_val_before; \ 435 Long o, s, z, a, c, p, v2, flags_in; \ 436 Long block[3]; \ 437 \ 438 for (v2 = 0; v2 < NVALS; v2++) { \ 439 \ 440 for (o = 0; o < 2; o++) { \ 441 for (s = 0; s < 2; s++) { \ 442 for (z = 0; z < 2; z++) { \ 443 for (a = 0; a < 2; a++) { \ 444 for (c = 0; c < 2; c++) { \ 445 for (p = 0; p < 2; p++) { \ 446 \ 447 flags_in = (o ? CC_O : 0) \ 448 | (s ? CC_S : 0) \ 449 | (z ? CC_Z : 0) \ 450 | (a ? CC_A : 0) \ 451 | (c ? CC_C : 0) \ 452 | (p ? CC_P : 0); \ 453 \ 454 e_val = val[v2]; \ 455 e_val_before = e_val; \ 456 \ 457 block[0] = flags_in; \ 458 block[1] = (long)&e_val; \ 459 block[2] = 0; \ 460 __asm__ __volatile__( \ 461 "movq 0(%0), %%rax\n\t" \ 462 "pushq %%rax\n\t" \ 463 "popfq\n\t" \ 464 "movq 8(%0), %%rbx\n\t" \ 465 "lock; " #_name " (%%rbx)\n\t" \ 466 "pushfq\n\t" \ 467 "popq %%rax\n\t" \ 468 "movq %%rax, 16(%0)\n\t" \ 469 : : "r"(&block[0]) : "rax","rbx","cc","memory" \ 470 ); \ 471 \ 472 send( \ 473 sprintf(outBuf, \ 474 "%s E=%016llx CCIN=%08llx -> E=%016llx CCOUT=%08llx\n", \ 475 #_name, e_val_before, flags_in, \ 476 e_val, block[2] & CC_MASK)); \ 477 \ 478 }}}}}} \ 479 \ 480 } \ 481 } 482 483GEN_do_locked_unary_E(decb,al) 484GEN_do_locked_unary_E(decw,ax) 485GEN_do_locked_unary_E(decl,eax) 486GEN_do_locked_unary_E(decq,rax) 487 488GEN_do_locked_unary_E(incb,al) 489GEN_do_locked_unary_E(incw,ax) 490GEN_do_locked_unary_E(incl,eax) 491GEN_do_locked_unary_E(incq,rax) 492 493GEN_do_locked_unary_E(negb,al) 494GEN_do_locked_unary_E(negw,ax) 495GEN_do_locked_unary_E(negl,eax) 496GEN_do_locked_unary_E(negq,rax) 497 498GEN_do_locked_unary_E(notb,al) 499GEN_do_locked_unary_E(notw,ax) 500GEN_do_locked_unary_E(notl,eax) 501GEN_do_locked_unary_E(notq,rax) 502 503 504///////////////////////////////////////////////////////////////// 505 506ULong btsq_mem ( UChar* base, int bitno ) 507{ 508 ULong res; 509 __asm__ 510 __volatile__("lock; btsq\t%2, %0\n\t" 511 "setc %%dl\n\t" 512 "movzbq %%dl,%1\n" 513 : "=m" (*base), "=r" (res) 514 : "r" ((ULong)bitno) : "rdx","cc","memory" ); 515 /* Pretty meaningless to dereference base here, but that's what you 516 have to do to get a btsl insn which refers to memory starting at 517 base. */ 518 return res; 519} 520ULong btsl_mem ( UChar* base, int bitno ) 521{ 522 ULong res; 523 __asm__ 524 __volatile__("lock; btsl\t%2, %0\n\t" 525 "setc %%dl\n\t" 526 "movzbq %%dl,%1\n" 527 : "=m" (*base), "=r" (res) 528 : "r" ((UInt)bitno)); 529 return res; 530} 531ULong btsw_mem ( UChar* base, int bitno ) 532{ 533 ULong res; 534 __asm__ 535 __volatile__("lock; btsw\t%w2, %0\n\t" 536 "setc %%dl\n\t" 537 "movzbq %%dl,%1\n" 538 : "=m" (*base), "=r" (res) 539 : "r" ((ULong)bitno)); 540 return res; 541} 542 543ULong btrq_mem ( UChar* base, int bitno ) 544{ 545 ULong res; 546 __asm__ 547 __volatile__("lock; btrq\t%2, %0\n\t" 548 "setc %%dl\n\t" 549 "movzbq %%dl,%1\n" 550 : "=m" (*base), "=r" (res) 551 : "r" ((ULong)bitno)); 552 return res; 553} 554ULong btrl_mem ( UChar* base, int bitno ) 555{ 556 ULong res; 557 __asm__ 558 __volatile__("lock; btrl\t%2, %0\n\t" 559 "setc %%dl\n\t" 560 "movzbq %%dl,%1\n" 561 : "=m" (*base), "=r" (res) 562 : "r" ((UInt)bitno)); 563 return res; 564} 565ULong btrw_mem ( UChar* base, int bitno ) 566{ 567 ULong res; 568 __asm__ 569 __volatile__("lock; btrw\t%w2, %0\n\t" 570 "setc %%dl\n\t" 571 "movzbq %%dl,%1\n" 572 : "=m" (*base), "=r" (res) 573 : "r" ((ULong)bitno)); 574 return res; 575} 576 577ULong btcq_mem ( UChar* base, int bitno ) 578{ 579 ULong res; 580 __asm__ 581 __volatile__("lock; btcq\t%2, %0\n\t" 582 "setc %%dl\n\t" 583 "movzbq %%dl,%1\n" 584 : "=m" (*base), "=r" (res) 585 : "r" ((ULong)bitno)); 586 return res; 587} 588ULong btcl_mem ( UChar* base, int bitno ) 589{ 590 ULong res; 591 __asm__ 592 __volatile__("lock; btcl\t%2, %0\n\t" 593 "setc %%dl\n\t" 594 "movzbq %%dl,%1\n" 595 : "=m" (*base), "=r" (res) 596 : "r" ((UInt)bitno)); 597 return res; 598} 599ULong btcw_mem ( UChar* base, int bitno ) 600{ 601 ULong res; 602 __asm__ 603 __volatile__("lock; btcw\t%w2, %0\n\t" 604 "setc %%dl\n\t" 605 "movzbq %%dl,%1\n" 606 : "=m" (*base), "=r" (res) 607 : "r" ((ULong)bitno)); 608 return res; 609} 610 611ULong btq_mem ( UChar* base, int bitno ) 612{ 613 ULong res; 614 __asm__ 615 __volatile__("btq\t%2, %0\n\t" 616 "setc %%dl\n\t" 617 "movzbq %%dl,%1\n" 618 : "=m" (*base), "=r" (res) 619 : "r" ((ULong)bitno) 620 : "cc", "memory"); 621 return res; 622} 623ULong btl_mem ( UChar* base, int bitno ) 624{ 625 ULong res; 626 __asm__ 627 __volatile__("btl\t%2, %0\n\t" 628 "setc %%dl\n\t" 629 "movzbq %%dl,%1\n" 630 : "=m" (*base), "=r" (res) 631 : "r" ((UInt)bitno) 632 : "cc", "memory"); 633 return res; 634} 635ULong btw_mem ( UChar* base, int bitno ) 636{ 637 ULong res; 638 __asm__ 639 __volatile__("btw\t%w2, %0\n\t" 640 "setc %%dl\n\t" 641 "movzbq %%dl,%1\n" 642 : "=m" (*base), "=r" (res) 643 : "r" ((ULong)bitno)); 644 return res; 645} 646 647ULong rol1 ( ULong x ) 648{ 649 return (x << 1) | (x >> 63); 650} 651 652void do_bt_G_E_tests ( void ) 653{ 654 ULong n, bitoff, op; 655 ULong c; 656 UChar* block; 657 ULong carrydep, res;; 658 659 /*------------------------ MEM-Q -----------------------*/ 660 661 carrydep = 0; 662 block = calloc(200,1); 663 block += 100; 664 /* Valid bit offsets are -800 .. 799 inclusive. */ 665 666 for (n = 0; n < 10000; n++) { 667 bitoff = (random() % 1600) - 800; 668 op = random() % 4; 669 c = 2; 670 switch (op) { 671 case 0: c = btsq_mem(block, bitoff); break; 672 case 1: c = btrq_mem(block, bitoff); break; 673 case 2: c = btcq_mem(block, bitoff); break; 674 case 3: c = btq_mem(block, bitoff); break; 675 } 676 c &= 255; 677 assert(c == 0 || c == 1); 678 carrydep = c ? (rol1(carrydep) ^ (Long)bitoff) : carrydep; 679 } 680 681 /* Compute final result */ 682 block -= 100; 683 res = 0; 684 for (n = 0; n < 200; n++) { 685 UChar ch = block[n]; 686 /* printf("%d ", (int)block[n]); */ 687 res = rol1(res) ^ (ULong)ch; 688 } 689 690 send( sprintf(outBuf, 691 "bt{s,r,c}q: final res 0x%llx, carrydep 0x%llx\n", 692 res, carrydep)); 693 free(block); 694 695 /*------------------------ MEM-L -----------------------*/ 696 697 carrydep = 0; 698 block = calloc(200,1); 699 block += 100; 700 /* Valid bit offsets are -800 .. 799 inclusive. */ 701 702 for (n = 0; n < 10000; n++) { 703 bitoff = (random() % 1600) - 800; 704 op = random() % 4; 705 c = 2; 706 switch (op) { 707 case 0: c = btsl_mem(block, bitoff); break; 708 case 1: c = btrl_mem(block, bitoff); break; 709 case 2: c = btcl_mem(block, bitoff); break; 710 case 3: c = btl_mem(block, bitoff); break; 711 } 712 c &= 255; 713 assert(c == 0 || c == 1); 714 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 715 } 716 717 /* Compute final result */ 718 block -= 100; 719 res = 0; 720 for (n = 0; n < 200; n++) { 721 UChar ch = block[n]; 722 /* printf("%d ", (int)block[n]); */ 723 res = rol1(res) ^ (ULong)ch; 724 } 725 726 send( sprintf(outBuf, 727 "bt{s,r,c}l: final res 0x%llx, carrydep 0x%llx\n", 728 res, carrydep)); 729 free(block); 730 731 /*------------------------ MEM-W -----------------------*/ 732 733 carrydep = 0; 734 block = calloc(200,1); 735 block += 100; 736 /* Valid bit offsets are -800 .. 799 inclusive. */ 737 738 for (n = 0; n < 10000; n++) { 739 bitoff = (random() % 1600) - 800; 740 op = random() % 4; 741 c = 2; 742 switch (op) { 743 case 0: c = btsw_mem(block, bitoff); break; 744 case 1: c = btrw_mem(block, bitoff); break; 745 case 2: c = btcw_mem(block, bitoff); break; 746 case 3: c = btw_mem(block, bitoff); break; 747 } 748 c &= 255; 749 assert(c == 0 || c == 1); 750 carrydep = c ? (rol1(carrydep) ^ (Long)(Int)bitoff) : carrydep; 751 } 752 753 /* Compute final result */ 754 block -= 100; 755 res = 0; 756 for (n = 0; n < 200; n++) { 757 UChar ch = block[n]; 758 /* printf("%d ", (int)block[n]); */ 759 res = rol1(res) ^ (ULong)ch; 760 } 761 762 send(sprintf(outBuf, 763 "bt{s,r,c}w: final res 0x%llx, carrydep 0x%llx\n", 764 res, carrydep)); 765 free(block); 766} 767 768 769///////////////////////////////////////////////////////////////// 770 771/* Given a word, do bt/bts/btr/btc on bits 0, 1, 2 and 3 of it, and 772 also reconstruct the original bits 0, 1, 2, 3 by looking at the 773 carry flag. Returned result has mashed bits 0-3 at the bottom and 774 the reconstructed original bits 0-3 as 4-7. */ 775 776ULong mash_mem_Q ( ULong* origp ) 777{ 778 ULong reconstructed, mashed; 779 __asm__ __volatile__ ( 780 "movq %2, %%rdx\n\t" 781 "" 782 "movq $0, %%rax\n\t" 783 "\n\t" 784 "btq $0, (%%rdx)\n\t" 785 "setb %%cl\n\t" 786 "movzbq %%cl, %%rcx\n\t" 787 "orq %%rcx, %%rax\n\t" 788 "\n\t" 789 "lock; btsq $1, (%%rdx)\n\t" 790 "setb %%cl\n\t" 791 "movzbq %%cl, %%rcx\n\t" 792 "shlq $1, %%rcx\n\t" 793 "orq %%rcx, %%rax\n\t" 794 "\n\t" 795 "lock; btrq $2, (%%rdx)\n\t" 796 "setb %%cl\n\t" 797 "movzbq %%cl, %%rcx\n\t" 798 "shlq $2, %%rcx\n\t" 799 "orq %%rcx, %%rax\n\t" 800 "\n\t" 801 "lock; btcq $3, (%%rdx)\n\t" 802 "setb %%cl\n\t" 803 "movzbq %%cl, %%rcx\n\t" 804 "shlq $3, %%rcx\n\t" 805 "orq %%rcx, %%rax\n\t" 806 "\n\t" 807 "movq %%rax, %0\n\t" 808 "movq (%%rdx), %1" 809 : "=r" (reconstructed), "=r" (mashed) 810 : "r" (origp) 811 : "rax", "rcx", "rdx", "cc"); 812 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 813} 814 815ULong mash_mem_L ( UInt* origp ) 816{ 817 ULong reconstructed; UInt mashed; 818 __asm__ __volatile__ ( 819 "movq %2, %%rdx\n\t" 820 "" 821 "movq $0, %%rax\n\t" 822 "\n\t" 823 "btl $0, (%%rdx)\n\t" 824 "setb %%cl\n\t" 825 "movzbq %%cl, %%rcx\n\t" 826 "orq %%rcx, %%rax\n\t" 827 "\n\t" 828 "lock; btsl $1, (%%rdx)\n\t" 829 "setb %%cl\n\t" 830 "movzbq %%cl, %%rcx\n\t" 831 "shlq $1, %%rcx\n\t" 832 "orq %%rcx, %%rax\n\t" 833 "\n\t" 834 "lock; btrl $2, (%%rdx)\n\t" 835 "setb %%cl\n\t" 836 "movzbq %%cl, %%rcx\n\t" 837 "shlq $2, %%rcx\n\t" 838 "orq %%rcx, %%rax\n\t" 839 "\n\t" 840 "lock; btcl $3, (%%rdx)\n\t" 841 "setb %%cl\n\t" 842 "movzbq %%cl, %%rcx\n\t" 843 "shlq $3, %%rcx\n\t" 844 "orq %%rcx, %%rax\n\t" 845 "\n\t" 846 "movq %%rax, %0\n\t" 847 "movl (%%rdx), %1" 848 : "=r" (reconstructed), "=r" (mashed) 849 : "r" (origp) 850 : "rax", "rcx", "rdx", "cc"); 851 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 852} 853 854ULong mash_mem_W ( UShort* origp ) 855{ 856 ULong reconstructed, mashed; 857 __asm__ __volatile__ ( 858 "movq %2, %%rdx\n\t" 859 "" 860 "movq $0, %%rax\n\t" 861 "\n\t" 862 "btw $0, (%%rdx)\n\t" 863 "setb %%cl\n\t" 864 "movzbq %%cl, %%rcx\n\t" 865 "orq %%rcx, %%rax\n\t" 866 "\n\t" 867 "lock; btsw $1, (%%rdx)\n\t" 868 "setb %%cl\n\t" 869 "movzbq %%cl, %%rcx\n\t" 870 "shlq $1, %%rcx\n\t" 871 "orq %%rcx, %%rax\n\t" 872 "\n\t" 873 "lock; btrw $2, (%%rdx)\n\t" 874 "setb %%cl\n\t" 875 "movzbq %%cl, %%rcx\n\t" 876 "shlq $2, %%rcx\n\t" 877 "orq %%rcx, %%rax\n\t" 878 "\n\t" 879 "lock; btcw $3, (%%rdx)\n\t" 880 "setb %%cl\n\t" 881 "movzbq %%cl, %%rcx\n\t" 882 "shlq $3, %%rcx\n\t" 883 "orq %%rcx, %%rax\n\t" 884 "\n\t" 885 "movq %%rax, %0\n\t" 886 "movzwq (%%rdx), %1" 887 : "=r" (reconstructed), "=r" (mashed) 888 : "r" (origp) 889 : "rax", "rcx", "rdx", "cc"); 890 return (mashed & 0xF) | ((reconstructed & 0xF) << 4); 891} 892 893 894void do_bt_imm_E_tests( void ) 895{ 896 ULong i; 897 ULong* iiq = malloc(sizeof(ULong)); 898 UInt* iil = malloc(sizeof(UInt)); 899 UShort* iiw = malloc(sizeof(UShort)); 900 for (i = 0; i < 0x10; i++) { 901 *iiq = i; 902 *iil = i; 903 *iiw = i; 904 send(sprintf(outBuf,"0x%llx -> 0x%02llx 0x%02llx 0x%02llx\n", i, 905 mash_mem_Q(iiq), mash_mem_L(iil), mash_mem_W(iiw))); 906 } 907 free(iiq); 908 free(iil); 909 free(iiw); 910} 911 912 913///////////////////////////////////////////////////////////////// 914 915int main ( void ) 916{ 917 do_locked_G_E_addb(); 918 do_locked_G_E_addw(); 919 do_locked_G_E_addl(); 920 do_locked_G_E_addq(); 921 922 do_locked_G_E_orb(); 923 do_locked_G_E_orw(); 924 do_locked_G_E_orl(); 925 do_locked_G_E_orq(); 926 927 do_locked_G_E_adcb(); 928 do_locked_G_E_adcw(); 929 do_locked_G_E_adcl(); 930 do_locked_G_E_adcq(); 931 932 do_locked_G_E_sbbb(); 933 do_locked_G_E_sbbw(); 934 do_locked_G_E_sbbl(); 935 do_locked_G_E_sbbq(); 936 937 do_locked_G_E_andb(); 938 do_locked_G_E_andw(); 939 do_locked_G_E_andl(); 940 do_locked_G_E_andq(); 941 942 do_locked_G_E_subb(); 943 do_locked_G_E_subw(); 944 do_locked_G_E_subl(); 945 do_locked_G_E_subq(); 946 947 do_locked_G_E_xorb(); 948 do_locked_G_E_xorw(); 949 do_locked_G_E_xorl(); 950 do_locked_G_E_xorq(); 951 // 4 * 7 952 953 do_locked_imm_E_addb_0x7F(); 954 do_locked_imm_E_addb_0xF1(); 955 do_locked_imm_E_addw_0x7E(); 956 do_locked_imm_E_addw_0x9325(); 957 do_locked_imm_E_addl_0x7D(); 958 do_locked_imm_E_addl_0x31415927(); 959 do_locked_imm_E_addq_0x7D(); 960 do_locked_imm_E_addq_0x31415927(); 961 962 do_locked_imm_E_orb_0x7F(); 963 do_locked_imm_E_orb_0xF1(); 964 do_locked_imm_E_orw_0x7E(); 965 do_locked_imm_E_orw_0x9325(); 966 do_locked_imm_E_orl_0x7D(); 967 do_locked_imm_E_orl_0x31415927(); 968 do_locked_imm_E_orq_0x7D(); 969 do_locked_imm_E_orq_0x31415927(); 970 971 do_locked_imm_E_adcb_0x7F(); 972 do_locked_imm_E_adcb_0xF1(); 973 do_locked_imm_E_adcw_0x7E(); 974 do_locked_imm_E_adcw_0x9325(); 975 do_locked_imm_E_adcl_0x7D(); 976 do_locked_imm_E_adcl_0x31415927(); 977 do_locked_imm_E_adcq_0x7D(); 978 do_locked_imm_E_adcq_0x31415927(); 979 980 do_locked_imm_E_sbbb_0x7F(); 981 do_locked_imm_E_sbbb_0xF1(); 982 do_locked_imm_E_sbbw_0x7E(); 983 do_locked_imm_E_sbbw_0x9325(); 984 do_locked_imm_E_sbbl_0x7D(); 985 do_locked_imm_E_sbbl_0x31415927(); 986 do_locked_imm_E_sbbq_0x7D(); 987 do_locked_imm_E_sbbq_0x31415927(); 988 989 do_locked_imm_E_andb_0x7F(); 990 do_locked_imm_E_andb_0xF1(); 991 do_locked_imm_E_andw_0x7E(); 992 do_locked_imm_E_andw_0x9325(); 993 do_locked_imm_E_andl_0x7D(); 994 do_locked_imm_E_andl_0x31415927(); 995 do_locked_imm_E_andq_0x7D(); 996 do_locked_imm_E_andq_0x31415927(); 997 998 do_locked_imm_E_subb_0x7F(); 999 do_locked_imm_E_subb_0xF1(); 1000 do_locked_imm_E_subw_0x7E(); 1001 do_locked_imm_E_subw_0x9325(); 1002 do_locked_imm_E_subl_0x7D(); 1003 do_locked_imm_E_subl_0x31415927(); 1004 do_locked_imm_E_subq_0x7D(); 1005 do_locked_imm_E_subq_0x31415927(); 1006 1007 do_locked_imm_E_xorb_0x7F(); 1008 do_locked_imm_E_xorb_0xF1(); 1009 do_locked_imm_E_xorw_0x7E(); 1010 do_locked_imm_E_xorw_0x9325(); 1011 do_locked_imm_E_xorl_0x7D(); 1012 do_locked_imm_E_xorl_0x31415927(); 1013 do_locked_imm_E_xorq_0x7D(); 1014 do_locked_imm_E_xorq_0x31415927(); 1015 // 4 * 7 + 8 * 7 == 84 1016 1017 do_locked_unary_E_decb(); 1018 do_locked_unary_E_decw(); 1019 do_locked_unary_E_decl(); 1020 do_locked_unary_E_decq(); 1021 1022 do_locked_unary_E_incb(); 1023 do_locked_unary_E_incw(); 1024 do_locked_unary_E_incl(); 1025 do_locked_unary_E_incq(); 1026 1027 do_locked_unary_E_negb(); 1028 do_locked_unary_E_negw(); 1029 do_locked_unary_E_negl(); 1030 do_locked_unary_E_negq(); 1031 1032 do_locked_unary_E_notb(); 1033 do_locked_unary_E_notw(); 1034 do_locked_unary_E_notl(); 1035 do_locked_unary_E_notq(); 1036 // 100 1037 1038 do_bt_G_E_tests(); 1039 // 109 1040 do_bt_imm_E_tests(); 1041 // 118 1042 1043 // So there should be 118 lock-prefixed instructions in the 1044 // disassembly of this compilation unit. 1045 // confirm with 1046 // objdump -d ./amd64locked | grep lock | grep -v do_lock | grep -v elf64 | wc 1047 1048 1049 { UInt crcExpd = 0x1F677629; 1050 theCRC = crcFinalise( theCRC ); 1051 if (theCRC == crcExpd) { 1052 printf("amd64locked: PASS: CRCs actual 0x%08X expected 0x%08X\n", 1053 theCRC, crcExpd); 1054 } else { 1055 printf("amd64locked: FAIL: CRCs actual 0x%08X expected 0x%08X\n", 1056 theCRC, crcExpd); 1057 printf("amd64locked: set #define VERBOSE 1 to diagnose\n"); 1058 } 1059 } 1060 1061 return 0; 1062} 1063