16bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon/*===-- udivsi3.S - 32-bit unsigned integer divide ------------------------===//
26bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *
36bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *                     The LLVM Compiler Infrastructure
46bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *
56bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * This file is dual licensed under the MIT and the University of Illinois Open
66bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * Source Licenses. See LICENSE.TXT for details.
76bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *
86bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *===----------------------------------------------------------------------===//
96bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *
106bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * This file implements the __udivsi3 (32-bit unsigned integer divide)
116bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * function for the ARM architecture.  A naive digit-by-digit computation is
126bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon * employed for simplicity.
136bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *
146bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon *===----------------------------------------------------------------------===*/
156bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
166bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#include "../assembly.h"
176bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
186bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define ESTABLISH_FRAME \
196bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    push   {r7, lr}    ;\
206bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    mov     r7,     sp
216bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define CLEAR_FRAME_AND_RETURN \
226bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    pop    {r7, pc}
236bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
246bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define a r0
256bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define b r1
266bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define r r2
276bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define i r3
286bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define q ip
296bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon#define one lr
306bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
316bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon.syntax unified
326bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon.align 3
3337b97d1cf4501b94347e0b4e880f4b25825a289fAnton Korobeynikov// Ok, APCS and AAPCS agree on 32 bit args, so it's safe to use the same routine.
3437b97d1cf4501b94347e0b4e880f4b25825a289fAnton KorobeynikovDEFINE_AEABI_FUNCTION_ALIAS(__aeabi_uidiv, __udivsi3)
356bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen CanonDEFINE_COMPILERRT_FUNCTION(__udivsi3)
366bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  We use a simple digit by digit algorithm; before we get into the actual
376bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  divide loop, we must calculate the left-shift amount necessary to align
386bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  the MSB of the divisor with that of the dividend (If this shift is
396bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  negative, then the result is zero, and we early out). We also conjure a
406bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  bit mask of 1 to use in constructing the quotient, and initialize the
416bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  quotient to zero.
426bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    ESTABLISH_FRAME
436bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    clz     r2,     a
446bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    tst     b,      b   // detect divide-by-zero
456bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    clz     r3,     b
466bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    mov     q,      #0
47e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton Korobeynikov    beq     LOCAL_LABEL(return)    // return 0 if b is zero.
486bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    mov     one,    #1
496bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    subs    i,      r3, r2
50e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton Korobeynikov    blt     LOCAL_LABEL(return)    // return 0 if MSB(a) < MSB(b)
516bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
52e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton KorobeynikovLOCAL_LABEL(mainLoop):
536bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  This loop basically implements the following:
546bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//
556bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  do {
566bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//      if (a >= b << i) {
576bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//          a -= b << i;
586bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//          q |= 1 << i;
596bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//          if (a == 0) break;
606bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//      }
616bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  } while (--i)
626bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//
636bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  Note that this does not perform the final iteration (i == 0); by doing it
646bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  this way, we can merge the two branches which is a substantial win for
656bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  such a tight loop on current ARM architectures.
666bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    subs    r,      a,  b, lsl i
676bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    orrhs   q,      q,one, lsl i
686bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    movhs   a,      r
696bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    subsne  i,      i, #1
70e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton Korobeynikov    bhi     LOCAL_LABEL(mainLoop)
716bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
726bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  Do the final test subtraction and update of quotient (i == 0), as it is
736bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  not performed in the main loop.
746bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    subs    r,      a,  b
756bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    orrhs   q,      #1
766bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon
77e1f95ca8cf82f755d6c62a6dc5b745f2e5bdf7baAnton KorobeynikovLOCAL_LABEL(return):
786bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon//  Move the quotient to r0 and return.
796bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    mov     r0,     q
806bbe0bb0856f7e7cabe11bc0a10c268db034142bStephen Canon    CLEAR_FRAME_AND_RETURN
81