19ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// This file is dual licensed under the MIT and the University of Illinois Open
29ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// Source Licenses. See LICENSE.TXT for details.
3b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
419336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar#include "../assembly.h"
519336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar
6b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// di_int __divdi3(di_int a, di_int b);
7b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
8b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// result = a / b.
9b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// both inputs and the output are 64-bit signed integers.
10b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// This will do whatever the underlying hardware is set to do on division by zero.
11b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// No other exceptions are generated, as the divide cannot overflow.
12b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar//
13b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware
14b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// on x86_64.  The performance goal is ~40 cycles per divide, which is faster than
15b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// currently possible via simulation of integer divides on the x87 unit.
16b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar//
17b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// Stephen Canon, December 2008
18b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
19b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifdef __i386__
20b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
21b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar.text
222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines.balign 4
23b4b1e8c5085cf83a50242057775a33ae4323d402Daniel DunbarDEFINE_COMPILERRT_FUNCTION(__divdi3)
24b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
25b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar/* This is currently implemented by wrapping the unsigned divide up in an absolute
26b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar   value, then restoring the correct sign at the end of the computation.  This could
27b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar   certainly be improved upon. */
28b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
29b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	pushl		%esi
30b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 20(%esp),			%edx	// high word of b
31b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 16(%esp),			%eax	// low word of b
32b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%edx,			%ecx
33b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sarl		$31,			%ecx	// (b < 0) ? -1 : 0
34b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%ecx,			%eax
35b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%ecx,			%edx	// EDX:EAX = (b < 0) ? not(b) : b
36b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%ecx,			%eax
37b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sbbl		%ecx,			%edx	// EDX:EAX = abs(b)
38b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%edx,		 20(%esp)
39b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%eax,		 16(%esp)	// store abs(b) back to stack
40b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%ecx,			%esi	// set aside sign of b
41b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
42b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 12(%esp),			%edx	// high word of b
43b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	  8(%esp),			%eax	// low word of b
44b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%edx,			%ecx
45b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sarl		$31,			%ecx	// (a < 0) ? -1 : 0
46b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%ecx,			%eax
47b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%ecx,			%edx	// EDX:EAX = (a < 0) ? not(a) : a
48b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%ecx,			%eax
49b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sbbl		%ecx,			%edx	// EDX:EAX = abs(a)
50b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%edx,		 12(%esp)
51b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%eax,		  8(%esp)	// store abs(a) back to stack
52b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%ecx,			%esi	// sign of result = (sign of a) ^ (sign of b)
53b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
54b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	pushl		%ebx
55b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 24(%esp),			%ebx	// Find the index i of the leading bit in b.
56b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	bsrl		%ebx,			%ecx	// If the high word of b is zero, jump to
57b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	jz			9f						// the code to handle that special case [9].
58b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
59b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	/* High word of b is known to be non-zero on this branch */
60b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
61b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 20(%esp),			%eax	// Construct bhi, containing bits [1+i:32+i] of b
62b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
63b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%cl,			%eax	// Practically, this means that bhi is given by:
64b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%eax					//
65b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	notl		%ecx					//		bhi = (high word of b) << (31 - i) |
66b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shll		%cl,			%ebx	//			  (low word of b) >> (1 + i)
67b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	orl			%eax,			%ebx	//
68b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 16(%esp),			%edx	// Load the high and low words of a, and jump
69b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 12(%esp),			%eax	// to [1] if the high word is larger than bhi
70b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	cmpl		%ebx,			%edx	// to avoid overflowing the upcoming divide.
71b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	jae			1f
72b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
73b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
74b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
75b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	divl		%ebx					// eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r
76b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
77b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	pushl		%edi
78b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	notl		%ecx
79b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%eax
80b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%cl,			%eax	// q = qs >> (1 + i)
81b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%eax,			%edi
82b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	mull	 24(%esp)					// q*blo
83b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 16(%esp),			%ebx
84b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 20(%esp),			%ecx	// ECX:EBX = a
85b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%eax,			%ebx
86b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
87b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 28(%esp),			%eax
88b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	imull		%edi,			%eax	// q*bhi
89b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%eax,			%ecx	// ECX:EBX = a - q*b
90b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sbbl		$0,				%edi	// decrement q if remainder is negative
91b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%edx,			%edx
92b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%edi,			%eax
93b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
94b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	addl		%esi,			%eax	// Restore correct sign to result
95b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	adcl		%esi,			%edx
96b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%esi,			%eax
97b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%esi,			%edx
98b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%edi					// Restore callee-save registers
99b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%ebx
100b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%esi
101b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	retl								// Return
102b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
103b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
104b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar1:	/* High word of a is greater than or equal to (b >> (1 + i)) on this branch */
105b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
106b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%ebx,			%edx	// subtract bhi from ahi so that divide will not
107b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	divl		%ebx					// overflow, and find q and r such that
108b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar										//
109b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar										//		ahi:alo = (1:q)*bhi + r
110b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar										//
111b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar										// Note that q is a number in (31-i).(1+i)
112b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar										// fix point.
113b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
114b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	pushl		%edi
115b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	notl		%ecx
116b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%eax
117b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	orl			$0x80000000,	%eax
118b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	shrl		%cl,			%eax	// q = (1:qs) >> (1 + i)
119b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%eax,			%edi
120b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	mull	 24(%esp)					// q*blo
121b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 16(%esp),			%ebx
122b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 20(%esp),			%ecx	// ECX:EBX = a
123b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%eax,			%ebx
124b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sbbl		%edx,			%ecx	// ECX:EBX = a - q*blo
125b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 28(%esp),			%eax
126b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	imull		%edi,			%eax	// q*bhi
127b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	subl		%eax,			%ecx	// ECX:EBX = a - q*b
128b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	sbbl		$0,				%edi	// decrement q if remainder is negative
129b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%edx,			%edx
130b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%edi,			%eax
131b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
132b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	addl		%esi,			%eax	// Restore correct sign to result
133b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	adcl		%esi,			%edx
134b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%esi,			%eax
135b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%esi,			%edx
136b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%edi					// Restore callee-save registers
137b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%ebx
138b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%esi
139b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	retl								// Return
140b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
141b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
142b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar9:	/* High word of b is zero on this branch */
143b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
144b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 16(%esp),			%eax	// Find qhi and rhi such that
145b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 20(%esp),			%ecx	//
146b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%edx,			%edx	//		ahi = qhi*b + rhi	with	0 ≤ rhi < b
147b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	divl		%ecx					//
148b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%eax,			%ebx	//
149b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl	 12(%esp),			%eax	// Find qlo such that
150b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	divl		%ecx					//
151b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	movl		%ebx,			%edx	//		rhi:alo = qlo*b + rlo  with 0 ≤ rlo < b
152b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
153b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	addl		%esi,			%eax	// Restore correct sign to result
154b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	adcl		%esi,			%edx
155b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%esi,			%eax
156b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	xorl		%esi,			%edx
157b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%ebx					// Restore callee-save registers
158b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	popl		%esi
159b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar	retl								// Return
1602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__divdi3)
161b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar
162b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif // __i386__
163