19ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// This file is dual licensed under the MIT and the University of Illinois Open 29ad441ffec97db647fee3725b3424284fb913e14Howard Hinnant// Source Licenses. See LICENSE.TXT for details. 3b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 419336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar#include "../assembly.h" 519336a2d6b9b375ac106125950f4ff09742d1aecDaniel Dunbar 6b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// di_int __divdi3(di_int a, di_int b); 7b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 8b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// result = a / b. 9b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// both inputs and the output are 64-bit signed integers. 10b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// This will do whatever the underlying hardware is set to do on division by zero. 11b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// No other exceptions are generated, as the divide cannot overflow. 12b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// 13b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// This is targeted at 32-bit x86 *only*, as this can be done directly in hardware 14b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// on x86_64. The performance goal is ~40 cycles per divide, which is faster than 15b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// currently possible via simulation of integer divides on the x87 unit. 16b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// 17b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar// Stephen Canon, December 2008 18b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 19b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#ifdef __i386__ 20b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 21b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar.text 222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines.balign 4 23b4b1e8c5085cf83a50242057775a33ae4323d402Daniel DunbarDEFINE_COMPILERRT_FUNCTION(__divdi3) 24b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 25b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar/* This is currently implemented by wrapping the unsigned divide up in an absolute 26b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar value, then restoring the correct sign at the end of the computation. This could 27b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar certainly be improved upon. */ 28b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 29b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar pushl %esi 30b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 20(%esp), %edx // high word of b 31b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 16(%esp), %eax // low word of b 32b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %edx, %ecx 33b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sarl $31, %ecx // (b < 0) ? -1 : 0 34b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %ecx, %eax 35b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %ecx, %edx // EDX:EAX = (b < 0) ? not(b) : b 36b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %ecx, %eax 37b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sbbl %ecx, %edx // EDX:EAX = abs(b) 38b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %edx, 20(%esp) 39b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %eax, 16(%esp) // store abs(b) back to stack 40b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %ecx, %esi // set aside sign of b 41b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 42b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 12(%esp), %edx // high word of b 43b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 8(%esp), %eax // low word of b 44b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %edx, %ecx 45b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sarl $31, %ecx // (a < 0) ? -1 : 0 46b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %ecx, %eax 47b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %ecx, %edx // EDX:EAX = (a < 0) ? not(a) : a 48b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %ecx, %eax 49b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sbbl %ecx, %edx // EDX:EAX = abs(a) 50b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %edx, 12(%esp) 51b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %eax, 8(%esp) // store abs(a) back to stack 52b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %ecx, %esi // sign of result = (sign of a) ^ (sign of b) 53b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 54b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar pushl %ebx 55b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 24(%esp), %ebx // Find the index i of the leading bit in b. 56b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar bsrl %ebx, %ecx // If the high word of b is zero, jump to 57b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar jz 9f // the code to handle that special case [9]. 58b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 59b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar /* High word of b is known to be non-zero on this branch */ 60b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 61b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 20(%esp), %eax // Construct bhi, containing bits [1+i:32+i] of b 62b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 63b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %cl, %eax // Practically, this means that bhi is given by: 64b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %eax // 65b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar notl %ecx // bhi = (high word of b) << (31 - i) | 66b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shll %cl, %ebx // (low word of b) >> (1 + i) 67b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar orl %eax, %ebx // 68b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 16(%esp), %edx // Load the high and low words of a, and jump 69b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 12(%esp), %eax // to [1] if the high word is larger than bhi 70b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar cmpl %ebx, %edx // to avoid overflowing the upcoming divide. 71b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar jae 1f 72b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 73b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 74b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 75b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar divl %ebx // eax <-- qs, edx <-- r such that ahi:alo = bs*qs + r 76b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 77b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar pushl %edi 78b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar notl %ecx 79b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %eax 80b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %cl, %eax // q = qs >> (1 + i) 81b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %eax, %edi 82b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar mull 24(%esp) // q*blo 83b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 16(%esp), %ebx 84b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 20(%esp), %ecx // ECX:EBX = a 85b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %eax, %ebx 86b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sbbl %edx, %ecx // ECX:EBX = a - q*blo 87b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 28(%esp), %eax 88b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar imull %edi, %eax // q*bhi 89b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %eax, %ecx // ECX:EBX = a - q*b 90b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sbbl $0, %edi // decrement q if remainder is negative 91b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %edx, %edx 92b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %edi, %eax 93b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 94b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar addl %esi, %eax // Restore correct sign to result 95b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar adcl %esi, %edx 96b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %esi, %eax 97b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %esi, %edx 98b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %edi // Restore callee-save registers 99b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %ebx 100b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %esi 101b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar retl // Return 102b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 103b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 104b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar1: /* High word of a is greater than or equal to (b >> (1 + i)) on this branch */ 105b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 106b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %ebx, %edx // subtract bhi from ahi so that divide will not 107b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar divl %ebx // overflow, and find q and r such that 108b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar // 109b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar // ahi:alo = (1:q)*bhi + r 110b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar // 111b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar // Note that q is a number in (31-i).(1+i) 112b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar // fix point. 113b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 114b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar pushl %edi 115b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar notl %ecx 116b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %eax 117b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar orl $0x80000000, %eax 118b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar shrl %cl, %eax // q = (1:qs) >> (1 + i) 119b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %eax, %edi 120b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar mull 24(%esp) // q*blo 121b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 16(%esp), %ebx 122b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 20(%esp), %ecx // ECX:EBX = a 123b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %eax, %ebx 124b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sbbl %edx, %ecx // ECX:EBX = a - q*blo 125b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 28(%esp), %eax 126b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar imull %edi, %eax // q*bhi 127b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar subl %eax, %ecx // ECX:EBX = a - q*b 128b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar sbbl $0, %edi // decrement q if remainder is negative 129b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %edx, %edx 130b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %edi, %eax 131b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 132b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar addl %esi, %eax // Restore correct sign to result 133b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar adcl %esi, %edx 134b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %esi, %eax 135b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %esi, %edx 136b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %edi // Restore callee-save registers 137b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %ebx 138b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %esi 139b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar retl // Return 140b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 141b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 142b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar9: /* High word of b is zero on this branch */ 143b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 144b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 16(%esp), %eax // Find qhi and rhi such that 145b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 20(%esp), %ecx // 146b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %edx, %edx // ahi = qhi*b + rhi with 0 ≤ rhi < b 147b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar divl %ecx // 148b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %eax, %ebx // 149b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl 12(%esp), %eax // Find qlo such that 150b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar divl %ecx // 151b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar movl %ebx, %edx // rhi:alo = qlo*b + rlo with 0 ≤ rlo < b 152b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 153b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar addl %esi, %eax // Restore correct sign to result 154b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar adcl %esi, %edx 155b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %esi, %eax 156b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar xorl %esi, %edx 157b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %ebx // Restore callee-save registers 158b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar popl %esi 159b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar retl // Return 1602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__divdi3) 161b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar 162b3a6901e66f55b35aa9e01bcb24134e6a65ea004Daniel Dunbar#endif // __i386__ 163