16bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===// 26bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * 36bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * The LLVM Compiler Infrastructure 46bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * 56bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * This file is dual licensed under the MIT and the University of Illinois Open 66bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * Source Licenses. See LICENSE.TXT for details. 76bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * 86bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *===----------------------------------------------------------------------===// 96bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * 106bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * This file implements the __udivsi3 (32-bit unsigned integer divide) 116bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * function for the ARM architecture. A naive digit-by-digit computation is 126bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * employed for simplicity. 136bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * 146bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *===----------------------------------------------------------------------===*/ 156bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 166bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#include "../assembly.h" 176bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 186bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define ESTABLISH_FRAME \ 196bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon push {r7, lr} ;\ 206bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon mov r7, sp 216bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define CLEAR_FRAME_AND_RETURN \ 226bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon pop {r7, pc} 236bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 246bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define a r0 256bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define b r1 266bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define r r2 276bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define i r3 286bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define q ip 296bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define one lr 306bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 316bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon.syntax unified 326bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon.align 3 3337b97d1cf4501b94347e0b4e880f4b25825a289fAnton Korobeynikov// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine. 3437b97d1cf4501b94347e0b4e880f4b25825a289fAnton KorobeynikovDEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3) 356bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen CanonDEFINE_COMPILERRT_FUNCTION(__udivsi3) 366bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// We use a simple digit by digit algorithm; before we get into the actual 376bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// divide loop, we must calculate the left-shift amount necessary to align 386bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// the MSB of the divisor with that of the dividend (If this shift is 396bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// negative, then the result is zero, and we early out). We also conjure a 406bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// bit mask of 1 to use in constructing the quotient, and initialize the 416bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// quotient to zero. 426bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon ESTABLISH_FRAME 436bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon clz r2, a 446bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon tst b, b // detect divide-by-zero 456bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon clz r3, b 466bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon mov q, #0 47e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton Korobeynikov beq LOCAL_LABEL(return) // return 0 if b is zero. 486bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon mov one, #1 496bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon subs i, r3, r2 50e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton Korobeynikov blt LOCAL_LABEL(return) // return 0 if MSB(a) < MSB(b) 516bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 52e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton KorobeynikovLOCAL_LABEL(mainLoop): 536bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// This loop basically implements the following: 546bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// 556bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// do { 566bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// if (a >= b << i) { 576bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// a -= b << i; 586bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// q |= 1 << i; 596bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// if (a == 0) break; 606bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// } 616bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// } while (--i) 626bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// 636bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// Note that this does not perform the final iteration (i == 0); by doing it 646bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// this way, we can merge the two branches which is a substantial win for 656bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// such a tight loop on current ARM architectures. 666bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon subs r, a, b, lsl i 676bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon orrhs q, q,one, lsl i 686bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon movhs a, r 696bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon subsne i, i, #1 70e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton Korobeynikov bhi LOCAL_LABEL(mainLoop) 716bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 726bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// Do the final test subtraction and update of quotient (i == 0), as it is 736bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// not performed in the main loop. 746bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon subs r, a, b 756bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon orrhs q, #1 766bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon 77e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton KorobeynikovLOCAL_LABEL(return): 786bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon// Move the quotient to r0 and return. 796bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon mov r0, q 806bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon CLEAR_FRAME_AND_RETURN 81