1dbebecc2bf00530ce09b3658641d1514d807aeeenjn
2dbebecc2bf00530ce09b3658641d1514d807aeeenjn.globl _start
3dbebecc2bf00530ce09b3658641d1514d807aeeenjn
4dbebecc2bf00530ce09b3658641d1514d807aeeenjn_start:
5dbebecc2bf00530ce09b3658641d1514d807aeeenjn        # This code tests for the fldcw "load floating point command word"
6dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   instruction.  On most x86 processors the retired_instruction
7dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   performance counter counts this as one instruction.  However,
8dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   on Pentium 4 systems it counts as two.  Therefore this can
9dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   affect BBV results on such a system.
10dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# fldcw is most often used to set the rouding mode when doing
11dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   floating point to integer conversions
12dbebecc2bf00530ce09b3658641d1514d807aeeenjn
13dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# It is encoded as "d9 /5" which means
14dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   1101 1001 xx10 1yyy
15dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# Where xx is the "mod" which will be 00, 01, or 10 indicating offset
16dbebecc2bf00530ce09b3658641d1514d807aeeenjn	#   and yyy is the register field
17dbebecc2bf00530ce09b3658641d1514d807aeeenjn
18dbebecc2bf00530ce09b3658641d1514d807aeeenjn
19dbebecc2bf00530ce09b3658641d1514d807aeeenjn
20dbebecc2bf00530ce09b3658641d1514d807aeeenjn        # these are instructions with similar encodings to fldcw
21dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# that can cause false positives if the test isn't explicit enough
22dbebecc2bf00530ce09b3658641d1514d807aeeenjnsimilar:
23dbebecc2bf00530ce09b3658641d1514d807aeeenjn        fld1   	   	       		# d9 e8
24dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldl2t				# d9 e9
25dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldl2e				# d9 ea
26dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldpi				# d9 eb
27dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldlg2				# d9 ec
28dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldln2				# d9 ed
29dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldz				# d9 ee
30dbebecc2bf00530ce09b3658641d1514d807aeeenjn
31dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# check some varied ways of calling fldcw
32dbebecc2bf00530ce09b3658641d1514d807aeeenjn
33dbebecc2bf00530ce09b3658641d1514d807aeeenjn
34dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# offset on stack
35dbebecc2bf00530ce09b3658641d1514d807aeeenjnstack:
36dbebecc2bf00530ce09b3658641d1514d807aeeenjn	sub	$4,%esp			# allocate space on stack
37dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fnstcw	2(%esp)
38dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	2(%esp)
39dbebecc2bf00530ce09b3658641d1514d807aeeenjn	add	$4,%esp			# restore stack
40dbebecc2bf00530ce09b3658641d1514d807aeeenjn
41dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# 32-bit register
42dbebecc2bf00530ce09b3658641d1514d807aeeenjn
43dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fnstcw	cw
44dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$cw,%eax
45dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	0(%eax)			# eax
46dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$cw,%ebx
47dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	0(%ebx)			# ebx
48dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$cw,%ecx
49dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	0(%ecx)			# ecx
50dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$cw,%edx
51dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	0(%edx)			# edx
52dbebecc2bf00530ce09b3658641d1514d807aeeenjn
53dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# register + 8-bit offset
54dbebecc2bf00530ce09b3658641d1514d807aeeenjneight_bit:
55dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$cw,%eax
56dbebecc2bf00530ce09b3658641d1514d807aeeenjn	sub	$32,%eax
57dbebecc2bf00530ce09b3658641d1514d807aeeenjn
58dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	32(%eax)		# eax + 8 bit offset
59dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	%eax,%ebx
60dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	32(%ebx)		# ebx + 8 bit offset
61dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	%eax,%ecx
62dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	32(%ecx)		# ecx + 8 bit offset
63dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	%eax,%edx
64dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	32(%edx)		# edx + 8 bit offset
65dbebecc2bf00530ce09b3658641d1514d807aeeenjn
66dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# register + 32-bit offset
67dbebecc2bf00530ce09b3658641d1514d807aeeenjnthirtytwo_bit:
68dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$cw,%eax
69dbebecc2bf00530ce09b3658641d1514d807aeeenjn	sub	$30000,%eax
70dbebecc2bf00530ce09b3658641d1514d807aeeenjn
71dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	30000(%eax)		# eax + 16 bit offset
72dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	%eax,%ebx
73dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	30000(%ebx)		# ebx + 16 bit offset
74dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	%eax,%ecx
75dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	30000(%ecx)		# ecx + 16 bit offset
76dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	%eax,%edx
77dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	30000(%edx)		# edx + 16 bit offset
78dbebecc2bf00530ce09b3658641d1514d807aeeenjn
79dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# check an fp/integer conversion
80dbebecc2bf00530ce09b3658641d1514d807aeeenjn	# in a loop to give a bigger count
81dbebecc2bf00530ce09b3658641d1514d807aeeenjn
82dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$1024,%ecx
83dbebecc2bf00530ce09b3658641d1514d807aeeenjnbig_loop:
84dbebecc2bf00530ce09b3658641d1514d807aeeenjn
85dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldl	three			# load value onto fp stack
86dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fnstcw	saved_cw		# store control word to mem
87dbebecc2bf00530ce09b3658641d1514d807aeeenjn	movzwl	saved_cw, %eax		# load cw from mem, zero extending
88dbebecc2bf00530ce09b3658641d1514d807aeeenjn	movb	$12, %ah		# set cw for "round to zero"
89dbebecc2bf00530ce09b3658641d1514d807aeeenjn	movw	%ax, cw			# store back to memory
90dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	cw   			# save new rounding mode
91dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fistpl	result			# save stack value as integer to mem
92dbebecc2bf00530ce09b3658641d1514d807aeeenjn	fldcw	saved_cw		# restore old cw
93dbebecc2bf00530ce09b3658641d1514d807aeeenjn
94dbebecc2bf00530ce09b3658641d1514d807aeeenjn	loop	big_loop		# loop to make the count more obvious
95dbebecc2bf00530ce09b3658641d1514d807aeeenjn
96dbebecc2bf00530ce09b3658641d1514d807aeeenjn	movl	result, %ebx		# sanity check to see if the
97dbebecc2bf00530ce09b3658641d1514d807aeeenjn	cmp	$3,%ebx			# result is the expected one
98dbebecc2bf00530ce09b3658641d1514d807aeeenjn	je	exit
99dbebecc2bf00530ce09b3658641d1514d807aeeenjn
100dbebecc2bf00530ce09b3658641d1514d807aeeenjnprint_error:
101dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov 	$4,%eax			# write syscall
1028eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#if defined(VGO_darwin)
103dbebecc2bf00530ce09b3658641d1514d807aeeenjn	pushl	$22
1048eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	pushl	$error
1058eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	pushl	$1
1068eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	int 	$0x80
1078eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_linux)
108dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$1,%ebx			# stdout
109dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov	$error,%ecx		# string
110dbebecc2bf00530ce09b3658641d1514d807aeeenjn	mov 	$22,%edx		# length of string
111dbebecc2bf00530ce09b3658641d1514d807aeeenjn	int 	$0x80
1128eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_solaris)
1138eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	pushl	$22
1148eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	pushl	$error
1158eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	pushl	$1
1168eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	int 	$0x91
1178eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#else
1188eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#  error "Unknown OS"
1198eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#endif
1208eb8bab992e3998c33770b0cdb16059a8b918a06sewardj
121dbebecc2bf00530ce09b3658641d1514d807aeeenjnexit:
1228eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	movl	$1,	%eax		# SYSCALL_EXIT
1238eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#if defined(VGO_darwin)
124dbebecc2bf00530ce09b3658641d1514d807aeeenjn	pushl	result
1258eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	int	$0x80
1268eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_linux)
127dbebecc2bf00530ce09b3658641d1514d807aeeenjn	movl	result, %ebx		# load converted value
128dbebecc2bf00530ce09b3658641d1514d807aeeenjn	int	$0x80
1298eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#elif defined(VGO_solaris)
1308eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	pushl	result
1318eb8bab992e3998c33770b0cdb16059a8b918a06sewardj	int	$0x91
1328eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#else
1338eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#  error "Unknown OS"
1348eb8bab992e3998c33770b0cdb16059a8b918a06sewardj#endif
135dbebecc2bf00530ce09b3658641d1514d807aeeenjn
136dbebecc2bf00530ce09b3658641d1514d807aeeenjn
137dbebecc2bf00530ce09b3658641d1514d807aeeenjn.data
138dbebecc2bf00530ce09b3658641d1514d807aeeenjnsaved_cw:	.long 0
139dbebecc2bf00530ce09b3658641d1514d807aeeenjncw:  	.long	0
140dbebecc2bf00530ce09b3658641d1514d807aeeenjnresult: .long	0
141dbebecc2bf00530ce09b3658641d1514d807aeeenjnthree:	.long	0			# a floating point 3.0
142dbebecc2bf00530ce09b3658641d1514d807aeeenjn	.long	1074266112
1438eb8bab992e3998c33770b0cdb16059a8b918a06sewardjerror:	.ascii  "Error!  Wrong result!\n\0"
144