1
2.globl _start
3
4_start:
5        # This code tests for the fldcw "load floating point command word"
6	#   instruction.  On most x86 processors the retired_instruction
7	#   performance counter counts this as one instruction.  However,
8	#   on Pentium 4 systems it counts as two.  Therefore this can
9	#   affect BBV results on such a system.
10	# fldcw is most often used to set the rouding mode when doing
11	#   floating point to integer conversions
12
13	# It is encoded as "d9 /5" which means
14	#   1101 1001 xx10 1yyy
15	# Where xx is the "mod" which will be 00, 01, or 10 indicating offset
16	#   and yyy is the register field
17
18        # these are instructions with similar encodings to fldcw
19	# that can cause false positives if the test isn't explicit enough
20similar:
21        fld1   	   	       		# d9 e8
22	fldl2t				# d9 e9
23	fldl2e				# d9 ea
24	fldpi				# d9 eb
25	fldlg2				# d9 ec
26	fldln2				# d9 ed
27	fldz				# d9 ee
28
29	# check some varied ways of calling fldcw
30
31	# offset on stack
32stack:
33	sub	$8,%rsp			# allocate space on stack
34	fnstcw	2(%rsp)
35	fldcw	2(%rsp)
36	add	$8,%rsp			# restore stack
37
38	# 64-bit register
39sixtyfour_reg:
40	fnstcw	cw
41	mov	$cw,%rax
42	fldcw	0(%rax)			# rax
43	mov	$cw,%rbx
44	fldcw	0(%rbx)			# rbx
45	mov	$cw,%rcx
46	fldcw	0(%rcx)			# rcx
47	mov	$cw,%rdx
48	fldcw	0(%rdx)			# rdx
49
50	# 32-bit register
51
52	# Note!  The assembler that comes with SuSE 9.1
53	#        cannot assemble 32-bit fldcw on 64-bit systems
54	#        Hence the need to hand-code them
55
56
57thirtytwo_reg:
58	fnstcw	cw
59	mov	$cw,%eax
60
61#	fldcw	0(%eax)			# eax
62	.byte	0x67,0xd9,0x28
63
64	mov	$cw,%ebx
65
66#	fldcw	0(%ebx)			# ebx
67	.byte	0x67,0xd9,0x2b
68
69	mov	$cw,%ecx
70
71#	fldcw	0(%ecx)			# ecx
72	.byte	0x67,0xd9,0x29
73
74	mov	$cw,%edx
75
76#	fldcw	0(%edx)			# edx
77	.byte	0x67,0xd9,0x2a
78
79	# register + 8-bit offset
80eight_bit:
81	mov	$cw,%eax
82	sub	$32,%eax
83
84#	fldcw	32(%eax)		# eax + 8 bit offset
85	.byte 0x67,0xd9,0x68,0x20
86
87	mov	%eax,%ebx
88#	fldcw	32(%ebx)		# ebx + 8 bit offset
89	.byte	0x67,0xd9,0x6b,0x20
90
91	mov	%eax,%ecx
92
93#	fldcw	32(%ecx)		# ecx + 8 bit offset
94	.byte	0x67,0xd9,0x69,0x20
95
96	mov	%eax,%edx
97
98#	fldcw	32(%edx)		# edx + 8 bit offset
99	.byte	0x67,0xd9,0x6a,0x20
100
101
102	# register + 32-bit offset
103thirtytwo_bit:
104	mov	$cw,%eax
105	sub	$30000,%eax
106
107#	fldcw	30000(%eax)		# eax + 16 bit offset
108	.byte	0x67,0xd9,0xa8,0x30,0x75,0x00,0x00
109
110	mov	%eax,%ebx
111
112#	fldcw	30000(%ebx)		# ebx + 16 bit offset
113	.byte	0x67,0xd9,0xab,0x30,0x75,0x00,0x00
114
115	mov	%eax,%ecx
116
117#	fldcw	30000(%ecx)		# ecx + 16 bit offset
118	.byte	0x67,0xd9,0xa9,0x30,0x75,0x00,0x00
119
120	mov	%eax,%edx
121
122#	fldcw	30000(%edx)		# edx + 16 bit offset
123	.byte	0x67,0xd9,0xaa,0x30,0x75,0x00,0x00
124
125	# check an fp/integer conversion
126	# in a loop to give a bigger count
127
128	mov	$1024,%rcx
129big_loop:
130
131	fldl	three			# load value onto fp stack
132	fnstcw	saved_cw		# store control word to mem
133	movzwl	saved_cw, %eax		# load cw from mem, zero extending
134	movb	$12, %ah		# set cw for "round to zero"
135	movw	%ax, cw			# store back to memory
136	fldcw	cw   			# save new rounding mode
137	fistpl	result			# save stack value as integer to mem
138	fldcw	saved_cw		# restore old cw
139
140	loop	big_loop		# loop to make the count more obvious
141
142	movl	result, %ebx		# sanity check to see if the
143	cmp	$3,%rbx			# result is the expected one
144	je	exit
145
146print_error:
147	mov 	$1,%rax			# write syscall
148	mov	$1,%rdi			# stdout
149	mov	$error,%rsi		# string
150	mov 	$22,%rdx		# length of string
151	syscall
152
153exit:
154	xor	%rdi, %rdi		# return 0
155	mov	$60, %rax		# SYSCALL_EXIT
156	syscall
157
158
159
160.data
161saved_cw:	.long 0
162cw:  	.long	0
163result: .long	0
164three:	.long	0			# a floating point 3.0
165	.long	1074266112
166error:	.asciz  "Error!  Wrong result!\n"
167