12d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines/*===-- udivmodsi4.S - 32-bit unsigned integer divide and modulus ---------===// 22d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 32d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * The LLVM Compiler Infrastructure 42d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 52d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * This file is dual licensed under the MIT and the University of Illinois Open 62d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * Source Licenses. See LICENSE.TXT for details. 72d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 82d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *===----------------------------------------------------------------------===// 92d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 102d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * This file implements the __udivmodsi4 (32-bit unsigned integer divide and 112d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * modulus) function for the ARM 32-bit architecture. 122d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 132d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines *===----------------------------------------------------------------------===*/ 142d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 152d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#include "../assembly.h" 162d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 172d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines .syntax unified 182d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines .text 195d71de26cedae3dafc17449fe0182045c0bd20e8Stephen Hines 202d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines .p2align 2 212d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesDEFINE_COMPILERRT_FUNCTION(__udivmodsi4) 222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#if __ARM_ARCH_EXT_IDIV__ 232d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines tst r1, r1 242d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines beq LOCAL_LABEL(divby0) 252d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r3, r0 262d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines udiv r0, r3, r1 272d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mls r1, r0, r1, r3 282d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines str r1, [r2] 292d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines bx lr 302d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#else 312d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r1, #1 322d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines bcc LOCAL_LABEL(divby0) 332d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines beq LOCAL_LABEL(divby1) 342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r0, r1 352d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines bcc LOCAL_LABEL(quotient0) 362d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines /* 372d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * Implement division using binary long division algorithm. 382d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * r0 is the numerator, r1 the denominator. 402d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 412d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * The code before JMP computes the correct shift I, so that 422d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * r0 and (r1 << I) have the highest bit set in the same position. 432d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * At the time of JMP, ip := .Ldiv0block - 12 * I. 442d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * This depends on the fixed instruction size of block. 452d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * 462d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * block(shift) implements the test-and-update-quotient core. 472d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * It assumes (r0 << shift) can be computed without overflow and 482d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines * that (r0 << shift) < 2 * r1. The quotient is stored in r3. 492d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines */ 502d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 512d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines# ifdef __ARM_FEATURE_CLZ 522d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines clz ip, r0 532d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines clz r3, r1 542d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines /* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */ 552d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines sub r3, r3, ip 562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines adr ip, LOCAL_LABEL(div0block) 572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines sub ip, ip, r3, lsl #2 582d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines sub ip, ip, r3, lsl #3 592d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r3, #0 602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines bx ip 612d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines# else 622d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines str r4, [sp, #-8]! 632d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 642d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r4, r0 652d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines adr ip, LOCAL_LABEL(div0block) 662d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 672d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines lsr r3, r4, #16 682d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r3, r1 692d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines movhs r4, r3 702d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines subhs ip, ip, #(16 * 12) 712d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 722d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines lsr r3, r4, #8 732d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r3, r1 742d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines movhs r4, r3 752d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines subhs ip, ip, #(8 * 12) 762d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 772d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines lsr r3, r4, #4 782d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r3, r1 792d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines movhs r4, r3 802d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines subhs ip, #(4 * 12) 812d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 822d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines lsr r3, r4, #2 832d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r3, r1 842d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines movhs r4, r3 852d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines subhs ip, ip, #(2 * 12) 862d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 872d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines /* Last block, no need to update r3 or r4. */ 882d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r1, r4, lsr #1 892d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines subls ip, ip, #(1 * 12) 902d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 912d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines ldr r4, [sp], #8 /* restore r4, we are done with it. */ 922d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r3, #0 932d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 942d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines JMP(ip) 952d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines# endif 962d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 972d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#define IMM # 982d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 992d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#define block(shift) \ 1002d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines cmp r0, r1, lsl IMM shift; \ 1012d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines addhs r3, r3, IMM (1 << shift); \ 1022d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines subhs r0, r0, r1, lsl IMM shift 1032d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 1042d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(31) 1052d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(30) 1062d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(29) 1072d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(28) 1082d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(27) 1092d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(26) 1102d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(25) 1112d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(24) 1122d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(23) 1132d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(22) 1142d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(21) 1152d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(20) 1162d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(19) 1172d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(18) 1182d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(17) 1192d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(16) 1202d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(15) 1212d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(14) 1222d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(13) 1232d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(12) 1242d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(11) 1252d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(10) 1262d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(9) 1272d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(8) 1282d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(7) 1292d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(6) 1302d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(5) 1312d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(4) 1322d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(3) 1332d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(2) 1342d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(1) 1352d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(div0block): 1362d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines block(0) 1372d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 1382d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines str r0, [r2] 1392d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r0, r3 1402d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines JMP(lr) 1412d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 1422d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(quotient0): 1432d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines str r0, [r2] 1442d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r0, #0 1452d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines JMP(lr) 1462d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 1472d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(divby1): 1482d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r3, #0 1492d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines str r3, [r2] 1502d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines JMP(lr) 1512d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#endif /* __ARM_ARCH_EXT_IDIV__ */ 1522d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 1532d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesLOCAL_LABEL(divby0): 1542d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines mov r0, #0 1552d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#ifdef __ARM_EABI__ 1562d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines b __aeabi_idiv0 1572d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#else 1582d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines JMP(lr) 1592d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines#endif 1602d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen Hines 1612d1fdb26e458c4ddc04155c1d421bced3ba90cd0Stephen HinesEND_COMPILERRT_FUNCTION(__udivmodsi4) 162