1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown.globl _start
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown_start:
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        # This code tests for the fldcw "load floating point command word"
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   instruction.  On most x86 processors the retired_instruction
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   performance counter counts this as one instruction.  However,
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   on Pentium 4 systems it counts as two.  Therefore this can
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   affect BBV results on such a system.
10ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# fldcw is most often used to set the rouding mode when doing
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   floating point to integer conversions
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# It is encoded as "d9 /5" which means
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   1101 1001 xx10 1yyy
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# Where xx is the "mod" which will be 00, 01, or 10 indicating offset
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#   and yyy is the register field
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        # these are instructions with similar encodings to fldcw
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# that can cause false positives if the test isn't explicit enough
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownsimilar:
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        fld1   	   	       		# d9 e8
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldl2t				# d9 e9
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldl2e				# d9 ea
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldpi				# d9 eb
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldlg2				# d9 ec
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldln2				# d9 ed
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldz				# d9 ee
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# check some varied ways of calling fldcw
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# offset on stack
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstack:
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	sub	$8,%rsp			# allocate space on stack
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fnstcw	2(%rsp)
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	2(%rsp)
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	add	$8,%rsp			# restore stack
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# 64-bit register
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownsixtyfour_reg:
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fnstcw	cw
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%rax
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	0(%rax)			# rax
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%rbx
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	0(%rbx)			# rbx
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%rcx
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	0(%rcx)			# rcx
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%rdx
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	0(%rdx)			# rdx
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# 32-bit register
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# Note!  The assembler that comes with SuSE 9.1
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#        cannot assemble 32-bit fldcw on 64-bit systems
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	#        Hence the need to hand-code them
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownthirtytwo_reg:
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fnstcw	cw
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%eax
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	0(%eax)			# eax
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x28
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%ebx
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	0(%ebx)			# ebx
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x2b
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%ecx
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	0(%ecx)			# ecx
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x29
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%edx
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	0(%edx)			# edx
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x2a
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# register + 8-bit offset
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browneight_bit:
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%eax
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	sub	$32,%eax
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	32(%eax)		# eax + 8 bit offset
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte 0x67,0xd9,0x68,0x20
86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	%eax,%ebx
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	32(%ebx)		# ebx + 8 bit offset
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x6b,0x20
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	%eax,%ecx
92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	32(%ecx)		# ecx + 8 bit offset
94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x69,0x20
95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	%eax,%edx
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	32(%edx)		# edx + 8 bit offset
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0x6a,0x20
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# register + 32-bit offset
103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownthirtytwo_bit:
104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$cw,%eax
105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	sub	$30000,%eax
106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	30000(%eax)		# eax + 16 bit offset
108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0xa8,0x30,0x75,0x00,0x00
109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	%eax,%ebx
111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	30000(%ebx)		# ebx + 16 bit offset
113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0xab,0x30,0x75,0x00,0x00
114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	%eax,%ecx
116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	30000(%ecx)		# ecx + 16 bit offset
118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0xa9,0x30,0x75,0x00,0x00
119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	%eax,%edx
121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#	fldcw	30000(%edx)		# edx + 16 bit offset
123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.byte	0x67,0xd9,0xaa,0x30,0x75,0x00,0x00
124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# check an fp/integer conversion
126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	# in a loop to give a bigger count
127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$1024,%rcx
129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownbig_loop:
130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldl	three			# load value onto fp stack
132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fnstcw	saved_cw		# store control word to mem
133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	movzwl	saved_cw, %eax		# load cw from mem, zero extending
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	movb	$12, %ah		# set cw for "round to zero"
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	movw	%rax, cw		# store back to memory
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	cw   			# save new rounding mode
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fistpl	result			# save stack value as integer to mem
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	fldcw	saved_cw		# restore old cw
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	loop	big_loop		# loop to make the count more obvious
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	movl	result, %ebx		# sanity check to see if the
143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	cmp	$3,%rbx			# result is the expected one
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	je	exit
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownprint_error:
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov 	$1,%rax			# write syscall
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$1,%rdi			# stdout
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$error,%rsi		# string
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov 	$22,%rdx		# length of string
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	syscall
152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownexit:
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	xor	%rdi, %rdi		# return 0
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	mov	$60, %rax		# SYSCALL_EXIT
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	syscall
157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown.data
161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownsaved_cw:	.long 0
162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Browncw:  	.long	0
163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownresult: .long	0
164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownthree:	.long	0			# a floating point 3.0
165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown	.long	1074266112
166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownerror:	.asciz  "Error!  Wrong result!\n"
167