1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown.globl _start 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown_start: 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # This code tests for the fldcw "load floating point command word" 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # instruction. On most x86 processors the retired_instruction 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # performance counter counts this as one instruction. However, 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # on Pentium 4 systems it counts as two. Therefore this can 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # affect BBV results on such a system. 10ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # fldcw is most often used to set the rouding mode when doing 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # floating point to integer conversions 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # It is encoded as "d9 /5" which means 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # 1101 1001 xx10 1yyy 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # Where xx is the "mod" which will be 00, 01, or 10 indicating offset 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # and yyy is the register field 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # these are instructions with similar encodings to fldcw 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # that can cause false positives if the test isn't explicit enough 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownsimilar: 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fld1 # d9 e8 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldl2t # d9 e9 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldl2e # d9 ea 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldpi # d9 eb 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldlg2 # d9 ec 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldln2 # d9 ed 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldz # d9 ee 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # check some varied ways of calling fldcw 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # offset on stack 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstack: 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sub $8,%rsp # allocate space on stack 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fnstcw 2(%rsp) 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw 2(%rsp) 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown add $8,%rsp # restore stack 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # 64-bit register 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownsixtyfour_reg: 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fnstcw cw 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%rax 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw 0(%rax) # rax 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%rbx 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw 0(%rbx) # rbx 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%rcx 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw 0(%rcx) # rcx 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%rdx 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw 0(%rdx) # rdx 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # 32-bit register 51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # Note! The assembler that comes with SuSE 9.1 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # cannot assemble 32-bit fldcw on 64-bit systems 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # Hence the need to hand-code them 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownthirtytwo_reg: 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fnstcw cw 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%eax 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 0(%eax) # eax 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x28 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%ebx 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 0(%ebx) # ebx 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x2b 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%ecx 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 0(%ecx) # ecx 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x29 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%edx 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 0(%edx) # edx 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x2a 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # register + 8-bit offset 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browneight_bit: 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%eax 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sub $32,%eax 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 32(%eax) # eax + 8 bit offset 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x68,0x20 86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov %eax,%ebx 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 32(%ebx) # ebx + 8 bit offset 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x6b,0x20 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov %eax,%ecx 92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 32(%ecx) # ecx + 8 bit offset 94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x69,0x20 95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov %eax,%edx 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 32(%edx) # edx + 8 bit offset 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0x6a,0x20 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # register + 32-bit offset 103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownthirtytwo_bit: 104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $cw,%eax 105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sub $30000,%eax 106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 30000(%eax) # eax + 16 bit offset 108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0xa8,0x30,0x75,0x00,0x00 109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov %eax,%ebx 111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 30000(%ebx) # ebx + 16 bit offset 113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0xab,0x30,0x75,0x00,0x00 114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov %eax,%ecx 116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 30000(%ecx) # ecx + 16 bit offset 118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0xa9,0x30,0x75,0x00,0x00 119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov %eax,%edx 121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown# fldcw 30000(%edx) # edx + 16 bit offset 123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .byte 0x67,0xd9,0xaa,0x30,0x75,0x00,0x00 124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # check an fp/integer conversion 126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown # in a loop to give a bigger count 127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $1024,%rcx 129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownbig_loop: 130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldl three # load value onto fp stack 132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fnstcw saved_cw # store control word to mem 133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown movzwl saved_cw, %eax # load cw from mem, zero extending 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown movb $12, %ah # set cw for "round to zero" 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown movw %rax, cw # store back to memory 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw cw # save new rounding mode 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fistpl result # save stack value as integer to mem 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fldcw saved_cw # restore old cw 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown loop big_loop # loop to make the count more obvious 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown movl result, %ebx # sanity check to see if the 143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown cmp $3,%rbx # result is the expected one 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown je exit 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownprint_error: 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $1,%rax # write syscall 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $1,%rdi # stdout 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $error,%rsi # string 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $22,%rdx # length of string 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown syscall 152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownexit: 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown xor %rdi, %rdi # return 0 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mov $60, %rax # SYSCALL_EXIT 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown syscall 157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown.data 161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownsaved_cw: .long 0 162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browncw: .long 0 163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownresult: .long 0 164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownthree: .long 0 # a floating point 3.0 165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown .long 1074266112 166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownerror: .asciz "Error! Wrong result!\n" 167