1dbebecc2bf00530ce09b3658641d1514d807aeeenjn 2dbebecc2bf00530ce09b3658641d1514d807aeeenjn.globl _start 3dbebecc2bf00530ce09b3658641d1514d807aeeenjn 4dbebecc2bf00530ce09b3658641d1514d807aeeenjn_start: 5dbebecc2bf00530ce09b3658641d1514d807aeeenjn # This code tests for the fldcw "load floating point command word" 6dbebecc2bf00530ce09b3658641d1514d807aeeenjn # instruction. On most x86 processors the retired_instruction 7dbebecc2bf00530ce09b3658641d1514d807aeeenjn # performance counter counts this as one instruction. However, 8dbebecc2bf00530ce09b3658641d1514d807aeeenjn # on Pentium 4 systems it counts as two. Therefore this can 9dbebecc2bf00530ce09b3658641d1514d807aeeenjn # affect BBV results on such a system. 10dbebecc2bf00530ce09b3658641d1514d807aeeenjn # fldcw is most often used to set the rouding mode when doing 11dbebecc2bf00530ce09b3658641d1514d807aeeenjn # floating point to integer conversions 12dbebecc2bf00530ce09b3658641d1514d807aeeenjn 13dbebecc2bf00530ce09b3658641d1514d807aeeenjn # It is encoded as "d9 /5" which means 14dbebecc2bf00530ce09b3658641d1514d807aeeenjn # 1101 1001 xx10 1yyy 15dbebecc2bf00530ce09b3658641d1514d807aeeenjn # Where xx is the "mod" which will be 00, 01, or 10 indicating offset 16dbebecc2bf00530ce09b3658641d1514d807aeeenjn # and yyy is the register field 17dbebecc2bf00530ce09b3658641d1514d807aeeenjn 18dbebecc2bf00530ce09b3658641d1514d807aeeenjn 19dbebecc2bf00530ce09b3658641d1514d807aeeenjn 20dbebecc2bf00530ce09b3658641d1514d807aeeenjn # these are instructions with similar encodings to fldcw 21dbebecc2bf00530ce09b3658641d1514d807aeeenjn # that can cause false positives if the test isn't explicit enough 22dbebecc2bf00530ce09b3658641d1514d807aeeenjnsimilar: 23dbebecc2bf00530ce09b3658641d1514d807aeeenjn fld1 # d9 e8 24dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldl2t # d9 e9 25dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldl2e # d9 ea 26dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldpi # d9 eb 27dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldlg2 # d9 ec 28dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldln2 # d9 ed 29dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldz # d9 ee 30dbebecc2bf00530ce09b3658641d1514d807aeeenjn 31dbebecc2bf00530ce09b3658641d1514d807aeeenjn # check some varied ways of calling fldcw 32dbebecc2bf00530ce09b3658641d1514d807aeeenjn 33dbebecc2bf00530ce09b3658641d1514d807aeeenjn 34dbebecc2bf00530ce09b3658641d1514d807aeeenjn # offset on stack 35dbebecc2bf00530ce09b3658641d1514d807aeeenjnstack: 36dbebecc2bf00530ce09b3658641d1514d807aeeenjn sub $4,%esp # allocate space on stack 37dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw 2(%esp) 38dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 2(%esp) 39dbebecc2bf00530ce09b3658641d1514d807aeeenjn add $4,%esp # restore stack 40dbebecc2bf00530ce09b3658641d1514d807aeeenjn 41dbebecc2bf00530ce09b3658641d1514d807aeeenjn # 32-bit register 42dbebecc2bf00530ce09b3658641d1514d807aeeenjn 43dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw cw 44dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%eax 45dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%eax) # eax 46dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%ebx 47dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%ebx) # ebx 48dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%ecx 49dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%ecx) # ecx 50dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%edx 51dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%edx) # edx 52dbebecc2bf00530ce09b3658641d1514d807aeeenjn 53dbebecc2bf00530ce09b3658641d1514d807aeeenjn # register + 8-bit offset 54dbebecc2bf00530ce09b3658641d1514d807aeeenjneight_bit: 55dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%eax 56dbebecc2bf00530ce09b3658641d1514d807aeeenjn sub $32,%eax 57dbebecc2bf00530ce09b3658641d1514d807aeeenjn 58dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 32(%eax) # eax + 8 bit offset 59dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ebx 60dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 32(%ebx) # ebx + 8 bit offset 61dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ecx 62dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 32(%ecx) # ecx + 8 bit offset 63dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%edx 64dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 32(%edx) # edx + 8 bit offset 65dbebecc2bf00530ce09b3658641d1514d807aeeenjn 66dbebecc2bf00530ce09b3658641d1514d807aeeenjn # register + 32-bit offset 67dbebecc2bf00530ce09b3658641d1514d807aeeenjnthirtytwo_bit: 68dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%eax 69dbebecc2bf00530ce09b3658641d1514d807aeeenjn sub $30000,%eax 70dbebecc2bf00530ce09b3658641d1514d807aeeenjn 71dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 30000(%eax) # eax + 16 bit offset 72dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ebx 73dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 30000(%ebx) # ebx + 16 bit offset 74dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ecx 75dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 30000(%ecx) # ecx + 16 bit offset 76dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%edx 77dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 30000(%edx) # edx + 16 bit offset 78dbebecc2bf00530ce09b3658641d1514d807aeeenjn 79dbebecc2bf00530ce09b3658641d1514d807aeeenjn # check an fp/integer conversion 80dbebecc2bf00530ce09b3658641d1514d807aeeenjn # in a loop to give a bigger count 81dbebecc2bf00530ce09b3658641d1514d807aeeenjn 82dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%ecx 83dbebecc2bf00530ce09b3658641d1514d807aeeenjnbig_loop: 84dbebecc2bf00530ce09b3658641d1514d807aeeenjn 85dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldl three # load value onto fp stack 86dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw saved_cw # store control word to mem 87dbebecc2bf00530ce09b3658641d1514d807aeeenjn movzwl saved_cw, %eax # load cw from mem, zero extending 88dbebecc2bf00530ce09b3658641d1514d807aeeenjn movb $12, %ah # set cw for "round to zero" 89dbebecc2bf00530ce09b3658641d1514d807aeeenjn movw %ax, cw # store back to memory 90dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw cw # save new rounding mode 91dbebecc2bf00530ce09b3658641d1514d807aeeenjn fistpl result # save stack value as integer to mem 92dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw saved_cw # restore old cw 93dbebecc2bf00530ce09b3658641d1514d807aeeenjn 94dbebecc2bf00530ce09b3658641d1514d807aeeenjn loop big_loop # loop to make the count more obvious 95dbebecc2bf00530ce09b3658641d1514d807aeeenjn 96dbebecc2bf00530ce09b3658641d1514d807aeeenjn movl result, %ebx # sanity check to see if the 97dbebecc2bf00530ce09b3658641d1514d807aeeenjn cmp $3,%ebx # result is the expected one 98dbebecc2bf00530ce09b3658641d1514d807aeeenjn je exit 99dbebecc2bf00530ce09b3658641d1514d807aeeenjn 100dbebecc2bf00530ce09b3658641d1514d807aeeenjnprint_error: 101dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $4,%eax # write syscall 1028eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#if defined(VGO_darwin) 103dbebecc2bf00530ce09b3658641d1514d807aeeenjn pushl $22 1048eb8bab992e3998c33770b0cdb16059a8b918a06sewardj pushl $error 1058eb8bab992e3998c33770b0cdb16059a8b918a06sewardj pushl $1 1068eb8bab992e3998c33770b0cdb16059a8b918a06sewardj int $0x80 1078eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_linux) 108dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1,%ebx # stdout 109dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $error,%ecx # string 110dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $22,%edx # length of string 111dbebecc2bf00530ce09b3658641d1514d807aeeenjn int $0x80 1128eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_solaris) 1138eb8bab992e3998c33770b0cdb16059a8b918a06sewardj pushl $22 1148eb8bab992e3998c33770b0cdb16059a8b918a06sewardj pushl $error 1158eb8bab992e3998c33770b0cdb16059a8b918a06sewardj pushl $1 1168eb8bab992e3998c33770b0cdb16059a8b918a06sewardj int $0x91 1178eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#else 1188eb8bab992e3998c33770b0cdb16059a8b918a06sewardj# error "Unknown OS" 1198eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#endif 1208eb8bab992e3998c33770b0cdb16059a8b918a06sewardj 121dbebecc2bf00530ce09b3658641d1514d807aeeenjnexit: 1228eb8bab992e3998c33770b0cdb16059a8b918a06sewardj movl $1, %eax # SYSCALL_EXIT 1238eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#if defined(VGO_darwin) 124dbebecc2bf00530ce09b3658641d1514d807aeeenjn pushl result 1258eb8bab992e3998c33770b0cdb16059a8b918a06sewardj int $0x80 1268eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_linux) 127dbebecc2bf00530ce09b3658641d1514d807aeeenjn movl result, %ebx # load converted value 128dbebecc2bf00530ce09b3658641d1514d807aeeenjn int $0x80 1298eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_solaris) 1308eb8bab992e3998c33770b0cdb16059a8b918a06sewardj pushl result 1318eb8bab992e3998c33770b0cdb16059a8b918a06sewardj int $0x91 1328eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#else 1338eb8bab992e3998c33770b0cdb16059a8b918a06sewardj# error "Unknown OS" 1348eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#endif 135dbebecc2bf00530ce09b3658641d1514d807aeeenjn 136dbebecc2bf00530ce09b3658641d1514d807aeeenjn 137dbebecc2bf00530ce09b3658641d1514d807aeeenjn.data 138dbebecc2bf00530ce09b3658641d1514d807aeeenjnsaved_cw: .long 0 139dbebecc2bf00530ce09b3658641d1514d807aeeenjncw: .long 0 140dbebecc2bf00530ce09b3658641d1514d807aeeenjnresult: .long 0 141dbebecc2bf00530ce09b3658641d1514d807aeeenjnthree: .long 0 # a floating point 3.0 142dbebecc2bf00530ce09b3658641d1514d807aeeenjn .long 1074266112 1438eb8bab992e3998c33770b0cdb16059a8b918a06sewardjerror: .ascii "Error! Wrong result!\n\0" 144