1/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
2 *
3 *                     The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 *===----------------------------------------------------------------------===//
9 *
10 * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
11 * modulus) function for the ARM architecture.  A naive digit-by-digit
12 * computation is employed for simplicity.
13 *
14 *===----------------------------------------------------------------------===*/
15
16#include "../assembly.h"
17
18#define ESTABLISH_FRAME    \
19    push   {r4, r7, lr}   ;\
20    add     r7,     sp, #4
21#define CLEAR_FRAME_AND_RETURN \
22    pop    {r4, r7, pc}
23
24#define a r0
25#define b r1
26#define i r3
27#define r r4
28#define q ip
29#define one lr
30
31.syntax unified
32.align 3
33DEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
34#if __ARM_ARCH_7S__
35	tst     r1, r1
36	beq     LOCAL_LABEL(divzero)
37	mov 	r3, r0
38	udiv	r0, r3, r1
39	mls 	r1, r0, r1, r3
40	str 	r1, [r2]
41	bx  	lr
42LOCAL_LABEL(divzero):
43	mov     r0, #0
44	bx      lr
45#else
46//  We use a simple digit by digit algorithm; before we get into the actual
47//  divide loop, we must calculate the left-shift amount necessary to align
48//  the MSB of the divisor with that of the dividend (If this shift is
49//  negative, then the result is zero, and we early out). We also conjure a
50//  bit mask of 1 to use in constructing the quotient, and initialize the
51//  quotient to zero.
52    ESTABLISH_FRAME
53    clz     r4,     a
54    tst     b,      b   // detect divide-by-zero
55    clz     r3,     b
56    mov     q,      #0
57    beq     LOCAL_LABEL(return)    // return 0 if b is zero.
58    mov     one,    #1
59    subs    i,      r3, r4
60    blt     LOCAL_LABEL(return)    // return 0 if MSB(a) < MSB(b)
61
62LOCAL_LABEL(mainLoop):
63//  This loop basically implements the following:
64//
65//  do {
66//      if (a >= b << i) {
67//          a -= b << i;
68//          q |= 1 << i;
69//          if (a == 0) break;
70//      }
71//  } while (--i)
72//
73//  Note that this does not perform the final iteration (i == 0); by doing it
74//  this way, we can merge the two branches which is a substantial win for
75//  such a tight loop on current ARM architectures.
76    subs    r,      a,  b, lsl i
77    orrhs   q,      q,one, lsl i
78    movhs   a,      r
79    subsne  i,      i, #1
80    bhi     LOCAL_LABEL(mainLoop)
81
82//  Do the final test subtraction and update of quotient (i == 0), as it is
83//  not performed in the main loop.
84    subs    r,      a,  b
85    orrhs   q,      #1
86    movhs   a,      r
87
88LOCAL_LABEL(return):
89//  Store the remainder, and move the quotient to r0, then return.
90    str     a,     [r2]
91    mov     r0,     q
92    CLEAR_FRAME_AND_RETURN
93#endif
94