12d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===//
22d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *
32d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *                     The LLVM Compiler Infrastructure
42d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *
52d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * This file is dual licensed under the MIT and the University of Illinois Open
62d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * Source Licenses. See LICENSE.TXT for details.
72d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *
82d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *===----------------------------------------------------------------------===//
92d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *
102d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * This file implements the __udivmodsi4 (32-bit unsigned integer divide and
112d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * modulus) function for the ARM 32-bit architecture.
122d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *
132d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *===----------------------------------------------------------------------===*/
142d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
152d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#include "../assembly.h"
162d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
172d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	.syntax unified
182d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	.text
195d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines
202d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	.p2align 2
212d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesDEFINE_COMPILERRT_FUNCTION(__udivmodsi4)
222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#if __ARM_ARCH_EXT_IDIV__
232d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	tst     r1, r1
242d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	beq     LOCAL_LABEL(divby0)
252d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov 	r3, r0
262d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	udiv	r0, r3, r1
272d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mls 	r1, r0, r1, r3
282d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	str 	r1, [r2]
292d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	bx  	lr
302d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#else
312d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r1, #1
322d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	bcc	LOCAL_LABEL(divby0)
332d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	beq	LOCAL_LABEL(divby1)
342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r0, r1
352d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	bcc	LOCAL_LABEL(quotient0)
362d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	/*
372d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * Implement division using binary long division algorithm.
382d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 *
392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * r0 is the numerator, r1 the denominator.
402d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 *
412d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * The code before JMP computes the correct shift I, so that
422d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * r0 and (r1 << I) have the highest bit set in the same position.
432d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * At the time of JMP, ip := .Ldiv0block - 12 * I.
442d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * This depends on the fixed instruction size of block.
452d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 *
462d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * block(shift) implements the test-and-update-quotient core.
472d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * It assumes (r0 << shift) can be computed without overflow and
482d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 * that (r0 << shift) < 2 * r1. The quotient is stored in r3.
492d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	 */
502d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
512d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#  ifdef __ARM_FEATURE_CLZ
522d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	clz	ip, r0
532d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	clz	r3, r1
542d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
552d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	sub	r3, r3, ip
562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	adr	ip, LOCAL_LABEL(div0block)
572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	sub	ip, ip, r3, lsl #2
582d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	sub	ip, ip, r3, lsl #3
592d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r3, #0
602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	bx	ip
612d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#  else
622d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	str	r4, [sp, #-8]!
632d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
642d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r4, r0
652d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	adr	ip, LOCAL_LABEL(div0block)
662d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
672d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	lsr	r3, r4, #16
682d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r3, r1
692d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	movhs	r4, r3
702d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	subhs	ip, ip, #(16 * 12)
712d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
722d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	lsr	r3, r4, #8
732d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r3, r1
742d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	movhs	r4, r3
752d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	subhs	ip, ip, #(8 * 12)
762d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
772d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	lsr	r3, r4, #4
782d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r3, r1
792d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	movhs	r4, r3
802d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	subhs	ip, #(4 * 12)
812d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
822d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	lsr	r3, r4, #2
832d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r3, r1
842d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	movhs	r4, r3
852d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	subhs	ip, ip, #(2 * 12)
862d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
872d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	/* Last block, no need to update r3 or r4. */
882d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r1, r4, lsr #1
892d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	subls	ip, ip, #(1 * 12)
902d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
912d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	ldr	r4, [sp], #8	/* restore r4, we are done with it. */
922d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r3, #0
932d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
942d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	JMP(ip)
952d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#  endif
962d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
972d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#define	IMM	#
982d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
992d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#define block(shift) \
1002d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	cmp	r0, r1, lsl IMM shift; \
1012d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	addhs	r3, r3, IMM (1 << shift); \
1022d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	subhs	r0, r0, r1, lsl IMM shift
1032d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
1042d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(31)
1052d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(30)
1062d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(29)
1072d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(28)
1082d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(27)
1092d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(26)
1102d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(25)
1112d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(24)
1122d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(23)
1132d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(22)
1142d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(21)
1152d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(20)
1162d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(19)
1172d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(18)
1182d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(17)
1192d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(16)
1202d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(15)
1212d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(14)
1222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(13)
1232d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(12)
1242d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(11)
1252d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(10)
1262d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(9)
1272d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(8)
1282d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(7)
1292d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(6)
1302d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(5)
1312d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(4)
1322d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(3)
1332d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(2)
1342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(1)
1352d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(div0block):
1362d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	block(0)
1372d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
1382d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	str	r0, [r2]
1392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r0, r3
1402d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	JMP(lr)
1412d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
1422d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(quotient0):
1432d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	str	r0, [r2]
1442d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r0, #0
1452d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	JMP(lr)
1462d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
1472d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(divby1):
1482d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r3, #0
1492d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	str	r3, [r2]
1502d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	JMP(lr)
1512d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#endif /* __ARM_ARCH_EXT_IDIV__ */
1522d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
1532d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(divby0):
1542d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	mov	r0, #0
1552d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#ifdef __ARM_EABI__
1562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	b	__aeabi_idiv0
1572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#else
1582d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines	JMP(lr)
1592d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#endif
1602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines
1612d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__udivmodsi4)
162