1/* ===-- clzdi2.c - Implement __clzdi2 -------------------------------------===
2 *
3 *               The LLVM Compiler Infrastructure
4 *
5 * This file is dual licensed under the MIT and the University of Illinois Open
6 * Source Licenses. See LICENSE.TXT for details.
7 *
8 * ===----------------------------------------------------------------------===
9 *
10 * This file implements count leading zeros for 64bit arguments.
11 *
12 * ===----------------------------------------------------------------------===
13 */
14#include "../assembly.h"
15
16	.syntax unified
17	.text
18#if __ARM_ARCH_ISA_THUMB == 2
19	.thumb
20#endif
21
22
23	.p2align	2
24DEFINE_COMPILERRT_FUNCTION(__clzdi2)
25#ifdef __ARM_FEATURE_CLZ
26#ifdef __ARMEB__
27	cmp	r0, 0
28	itee ne
29	clzne	r0, r0
30	clzeq	r0, r1
31	addeq	r0, r0, 32
32#else
33	cmp	r1, 0
34	itee ne
35	clzne	r0, r1
36	clzeq	r0, r0
37	addeq	r0, r0, 32
38#endif
39	JMP(lr)
40#else
41	/* Assumption: n != 0 */
42
43	/*
44	 * r0: n
45	 * r1: upper half of n, overwritten after check
46	 * r1: count of leading zeros in n + 1
47	 * r2: scratch register for shifted r0
48	 */
49#ifdef __ARMEB__
50	cmp	r0, 0
51	moveq	r0, r1
52#else
53	cmp	r1, 0
54	movne	r0, r1
55#endif
56	movne	r1, 1
57	moveq	r1, 33
58
59	/*
60	 * Basic block:
61	 * if ((r0 >> SHIFT) == 0)
62	 *   r1 += SHIFT;
63	 * else
64	 *   r0 >>= SHIFT;
65	 * for descending powers of two as SHIFT.
66	 */
67#define BLOCK(shift) \
68	lsrs	r2, r0, shift; \
69	movne	r0, r2; \
70	addeq	r1, shift \
71
72	BLOCK(16)
73	BLOCK(8)
74	BLOCK(4)
75	BLOCK(2)
76
77	/*
78	 * The basic block invariants at this point are (r0 >> 2) == 0 and
79	 * r0 != 0. This means 1 <= r0 <= 3 and 0 <= (r0 >> 1) <= 1.
80	 *
81	 * r0 | (r0 >> 1) == 0 | (r0 >> 1) == 1 | -(r0 >> 1) | 1 - (r0 >> 1)
82	 * ---+----------------+----------------+------------+--------------
83	 * 1  | 1              | 0              | 0          | 1
84	 * 2  | 0              | 1              | -1         | 0
85	 * 3  | 0              | 1              | -1         | 0
86	 *
87	 * The r1's initial value of 1 compensates for the 1 here.
88	 */
89	sub	r0, r1, r0, lsr #1
90
91	JMP(lr)
92#endif // __ARM_FEATURE_CLZ
93END_COMPILERRT_FUNCTION(__clzdi2)
94