1dbebecc2bf00530ce09b3658641d1514d807aeeenjn 2dbebecc2bf00530ce09b3658641d1514d807aeeenjn.globl _start 3dbebecc2bf00530ce09b3658641d1514d807aeeenjn 4dbebecc2bf00530ce09b3658641d1514d807aeeenjn_start: 5dbebecc2bf00530ce09b3658641d1514d807aeeenjn # This code tests for the fldcw "load floating point command word" 6dbebecc2bf00530ce09b3658641d1514d807aeeenjn # instruction. On most x86 processors the retired_instruction 7dbebecc2bf00530ce09b3658641d1514d807aeeenjn # performance counter counts this as one instruction. However, 8dbebecc2bf00530ce09b3658641d1514d807aeeenjn # on Pentium 4 systems it counts as two. Therefore this can 9dbebecc2bf00530ce09b3658641d1514d807aeeenjn # affect BBV results on such a system. 10dbebecc2bf00530ce09b3658641d1514d807aeeenjn # fldcw is most often used to set the rouding mode when doing 11dbebecc2bf00530ce09b3658641d1514d807aeeenjn # floating point to integer conversions 12dbebecc2bf00530ce09b3658641d1514d807aeeenjn 13dbebecc2bf00530ce09b3658641d1514d807aeeenjn # It is encoded as "d9 /5" which means 14dbebecc2bf00530ce09b3658641d1514d807aeeenjn # 1101 1001 xx10 1yyy 15dbebecc2bf00530ce09b3658641d1514d807aeeenjn # Where xx is the "mod" which will be 00, 01, or 10 indicating offset 16dbebecc2bf00530ce09b3658641d1514d807aeeenjn # and yyy is the register field 17dbebecc2bf00530ce09b3658641d1514d807aeeenjn 18dbebecc2bf00530ce09b3658641d1514d807aeeenjn # these are instructions with similar encodings to fldcw 19dbebecc2bf00530ce09b3658641d1514d807aeeenjn # that can cause false positives if the test isn't explicit enough 20dbebecc2bf00530ce09b3658641d1514d807aeeenjnsimilar: 21dbebecc2bf00530ce09b3658641d1514d807aeeenjn fld1 # d9 e8 22dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldl2t # d9 e9 23dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldl2e # d9 ea 24dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldpi # d9 eb 25dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldlg2 # d9 ec 26dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldln2 # d9 ed 27dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldz # d9 ee 28dbebecc2bf00530ce09b3658641d1514d807aeeenjn 29dbebecc2bf00530ce09b3658641d1514d807aeeenjn # check some varied ways of calling fldcw 30dbebecc2bf00530ce09b3658641d1514d807aeeenjn 31dbebecc2bf00530ce09b3658641d1514d807aeeenjn # offset on stack 32dbebecc2bf00530ce09b3658641d1514d807aeeenjnstack: 33dbebecc2bf00530ce09b3658641d1514d807aeeenjn sub $8,%rsp # allocate space on stack 34dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw 2(%rsp) 35dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 2(%rsp) 36dbebecc2bf00530ce09b3658641d1514d807aeeenjn add $8,%rsp # restore stack 37dbebecc2bf00530ce09b3658641d1514d807aeeenjn 38dbebecc2bf00530ce09b3658641d1514d807aeeenjn # 64-bit register 39dbebecc2bf00530ce09b3658641d1514d807aeeenjnsixtyfour_reg: 40dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw cw 41dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%rax 42dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%rax) # rax 43dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%rbx 44dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%rbx) # rbx 45dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%rcx 46dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%rcx) # rcx 47dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%rdx 48dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw 0(%rdx) # rdx 49dbebecc2bf00530ce09b3658641d1514d807aeeenjn 50dbebecc2bf00530ce09b3658641d1514d807aeeenjn # 32-bit register 51c98d2af90d010d78afc547141a61ca6029240cc6vince 52c98d2af90d010d78afc547141a61ca6029240cc6vince # Note! The assembler that comes with SuSE 9.1 53c98d2af90d010d78afc547141a61ca6029240cc6vince # cannot assemble 32-bit fldcw on 64-bit systems 54c98d2af90d010d78afc547141a61ca6029240cc6vince # Hence the need to hand-code them 55c98d2af90d010d78afc547141a61ca6029240cc6vince 56c98d2af90d010d78afc547141a61ca6029240cc6vince 57dbebecc2bf00530ce09b3658641d1514d807aeeenjnthirtytwo_reg: 58dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw cw 59dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%eax 60c98d2af90d010d78afc547141a61ca6029240cc6vince 61c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 0(%eax) # eax 62c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x28 63c98d2af90d010d78afc547141a61ca6029240cc6vince 64dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%ebx 65c98d2af90d010d78afc547141a61ca6029240cc6vince 66c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 0(%ebx) # ebx 67c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x2b 68c98d2af90d010d78afc547141a61ca6029240cc6vince 69dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%ecx 70dbebecc2bf00530ce09b3658641d1514d807aeeenjn 71c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 0(%ecx) # ecx 72c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x29 73c98d2af90d010d78afc547141a61ca6029240cc6vince 74c98d2af90d010d78afc547141a61ca6029240cc6vince mov $cw,%edx 75c98d2af90d010d78afc547141a61ca6029240cc6vince 76c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 0(%edx) # edx 77c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x2a 78c98d2af90d010d78afc547141a61ca6029240cc6vince 79dbebecc2bf00530ce09b3658641d1514d807aeeenjn # register + 8-bit offset 80dbebecc2bf00530ce09b3658641d1514d807aeeenjneight_bit: 81dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%eax 82dbebecc2bf00530ce09b3658641d1514d807aeeenjn sub $32,%eax 83dbebecc2bf00530ce09b3658641d1514d807aeeenjn 84c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 32(%eax) # eax + 8 bit offset 85c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x68,0x20 86c98d2af90d010d78afc547141a61ca6029240cc6vince 87dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ebx 88c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 32(%ebx) # ebx + 8 bit offset 89c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x6b,0x20 90c98d2af90d010d78afc547141a61ca6029240cc6vince 91dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ecx 92c98d2af90d010d78afc547141a61ca6029240cc6vince 93c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 32(%ecx) # ecx + 8 bit offset 94c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x69,0x20 95c98d2af90d010d78afc547141a61ca6029240cc6vince 96dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%edx 97c98d2af90d010d78afc547141a61ca6029240cc6vince 98c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 32(%edx) # edx + 8 bit offset 99c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0x6a,0x20 100c98d2af90d010d78afc547141a61ca6029240cc6vince 101dbebecc2bf00530ce09b3658641d1514d807aeeenjn 102dbebecc2bf00530ce09b3658641d1514d807aeeenjn # register + 32-bit offset 103dbebecc2bf00530ce09b3658641d1514d807aeeenjnthirtytwo_bit: 104dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $cw,%eax 105dbebecc2bf00530ce09b3658641d1514d807aeeenjn sub $30000,%eax 106dbebecc2bf00530ce09b3658641d1514d807aeeenjn 107c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 30000(%eax) # eax + 16 bit offset 108c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0xa8,0x30,0x75,0x00,0x00 109c98d2af90d010d78afc547141a61ca6029240cc6vince 110dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ebx 111c98d2af90d010d78afc547141a61ca6029240cc6vince 112c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 30000(%ebx) # ebx + 16 bit offset 113c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0xab,0x30,0x75,0x00,0x00 114c98d2af90d010d78afc547141a61ca6029240cc6vince 115dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%ecx 116c98d2af90d010d78afc547141a61ca6029240cc6vince 117c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 30000(%ecx) # ecx + 16 bit offset 118c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0xa9,0x30,0x75,0x00,0x00 119c98d2af90d010d78afc547141a61ca6029240cc6vince 120dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov %eax,%edx 121c98d2af90d010d78afc547141a61ca6029240cc6vince 122c98d2af90d010d78afc547141a61ca6029240cc6vince# fldcw 30000(%edx) # edx + 16 bit offset 123c98d2af90d010d78afc547141a61ca6029240cc6vince .byte 0x67,0xd9,0xaa,0x30,0x75,0x00,0x00 124c98d2af90d010d78afc547141a61ca6029240cc6vince 125dbebecc2bf00530ce09b3658641d1514d807aeeenjn # check an fp/integer conversion 126dbebecc2bf00530ce09b3658641d1514d807aeeenjn # in a loop to give a bigger count 127dbebecc2bf00530ce09b3658641d1514d807aeeenjn 128dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1024,%rcx 129dbebecc2bf00530ce09b3658641d1514d807aeeenjnbig_loop: 130dbebecc2bf00530ce09b3658641d1514d807aeeenjn 131dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldl three # load value onto fp stack 132dbebecc2bf00530ce09b3658641d1514d807aeeenjn fnstcw saved_cw # store control word to mem 133dbebecc2bf00530ce09b3658641d1514d807aeeenjn movzwl saved_cw, %eax # load cw from mem, zero extending 134dbebecc2bf00530ce09b3658641d1514d807aeeenjn movb $12, %ah # set cw for "round to zero" 135b1f90e040439f3eec6ae82d71dc0aabb725c6b30florian movw %ax, cw # store back to memory 136dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw cw # save new rounding mode 137dbebecc2bf00530ce09b3658641d1514d807aeeenjn fistpl result # save stack value as integer to mem 138dbebecc2bf00530ce09b3658641d1514d807aeeenjn fldcw saved_cw # restore old cw 139dbebecc2bf00530ce09b3658641d1514d807aeeenjn 140dbebecc2bf00530ce09b3658641d1514d807aeeenjn loop big_loop # loop to make the count more obvious 141dbebecc2bf00530ce09b3658641d1514d807aeeenjn 142dbebecc2bf00530ce09b3658641d1514d807aeeenjn movl result, %ebx # sanity check to see if the 143dbebecc2bf00530ce09b3658641d1514d807aeeenjn cmp $3,%rbx # result is the expected one 144dbebecc2bf00530ce09b3658641d1514d807aeeenjn je exit 145dbebecc2bf00530ce09b3658641d1514d807aeeenjn 146dbebecc2bf00530ce09b3658641d1514d807aeeenjnprint_error: 147dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1,%rax # write syscall 148dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $1,%rdi # stdout 149dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $error,%rsi # string 150dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $22,%rdx # length of string 151dbebecc2bf00530ce09b3658641d1514d807aeeenjn syscall 152dbebecc2bf00530ce09b3658641d1514d807aeeenjn 153dbebecc2bf00530ce09b3658641d1514d807aeeenjnexit: 154dbebecc2bf00530ce09b3658641d1514d807aeeenjn xor %rdi, %rdi # return 0 155dbebecc2bf00530ce09b3658641d1514d807aeeenjn mov $60, %rax # SYSCALL_EXIT 156dbebecc2bf00530ce09b3658641d1514d807aeeenjn syscall 157dbebecc2bf00530ce09b3658641d1514d807aeeenjn 158dbebecc2bf00530ce09b3658641d1514d807aeeenjn 159dbebecc2bf00530ce09b3658641d1514d807aeeenjn 160dbebecc2bf00530ce09b3658641d1514d807aeeenjn.data 161dbebecc2bf00530ce09b3658641d1514d807aeeenjnsaved_cw: .long 0 162dbebecc2bf00530ce09b3658641d1514d807aeeenjncw: .long 0 163dbebecc2bf00530ce09b3658641d1514d807aeeenjnresult: .long 0 164dbebecc2bf00530ce09b3658641d1514d807aeeenjnthree: .long 0 # a floating point 3.0 165dbebecc2bf00530ce09b3658641d1514d807aeeenjn .long 1074266112 166dbebecc2bf00530ce09b3658641d1514d807aeeenjnerror: .asciz "Error! Wrong result!\n" 167