1@ Written by Wilco Dijkstra, 1996.
2@ Refer to NOTICE file at the root of git project.
3@
4@ Minor modifications in code style for WebRTC, 2012.
5@ Output is bit-exact with the reference C code in spl_sqrt_floor.c.
6
7@ Input :             r0 32 bit unsigned integer
8@ Output:             r0 = INT (SQRT (r0)), precision is 16 bits
9@ Registers touched:  r1, r2
10
11.global WebRtcSpl_SqrtFloor
12
13.align  2
14WebRtcSpl_SqrtFloor:
15.fnstart
16  mov    r1, #3 << 30
17  mov    r2, #1 << 30
18
19  @ unroll for i = 0 .. 15
20
21  cmp    r0, r2, ror #2 * 0
22  subhs  r0, r0, r2, ror #2 * 0
23  adc    r2, r1, r2, lsl #1
24
25  cmp    r0, r2, ror #2 * 1
26  subhs  r0, r0, r2, ror #2 * 1
27  adc    r2, r1, r2, lsl #1
28
29  cmp    r0, r2, ror #2 * 2
30  subhs  r0, r0, r2, ror #2 * 2
31  adc    r2, r1, r2, lsl #1
32
33  cmp    r0, r2, ror #2 * 3
34  subhs  r0, r0, r2, ror #2 * 3
35  adc    r2, r1, r2, lsl #1
36
37  cmp    r0, r2, ror #2 * 4
38  subhs  r0, r0, r2, ror #2 * 4
39  adc    r2, r1, r2, lsl #1
40
41  cmp    r0, r2, ror #2 * 5
42  subhs  r0, r0, r2, ror #2 * 5
43  adc    r2, r1, r2, lsl #1
44
45  cmp    r0, r2, ror #2 * 6
46  subhs  r0, r0, r2, ror #2 * 6
47  adc    r2, r1, r2, lsl #1
48
49  cmp    r0, r2, ror #2 * 7
50  subhs  r0, r0, r2, ror #2 * 7
51  adc    r2, r1, r2, lsl #1
52
53  cmp    r0, r2, ror #2 * 8
54  subhs  r0, r0, r2, ror #2 * 8
55  adc    r2, r1, r2, lsl #1
56
57  cmp    r0, r2, ror #2 * 9
58  subhs  r0, r0, r2, ror #2 * 9
59  adc    r2, r1, r2, lsl #1
60
61  cmp    r0, r2, ror #2 * 10
62  subhs  r0, r0, r2, ror #2 * 10
63  adc    r2, r1, r2, lsl #1
64
65  cmp    r0, r2, ror #2 * 11
66  subhs  r0, r0, r2, ror #2 * 11
67  adc    r2, r1, r2, lsl #1
68
69  cmp    r0, r2, ror #2 * 12
70  subhs  r0, r0, r2, ror #2 * 12
71  adc    r2, r1, r2, lsl #1
72
73  cmp    r0, r2, ror #2 * 13
74  subhs  r0, r0, r2, ror #2 * 13
75  adc    r2, r1, r2, lsl #1
76
77  cmp    r0, r2, ror #2 * 14
78  subhs  r0, r0, r2, ror #2 * 14
79  adc    r2, r1, r2, lsl #1
80
81  cmp    r0, r2, ror #2 * 15
82  subhs  r0, r0, r2, ror #2 * 15
83  adc    r2, r1, r2, lsl #1
84
85  bic    r0, r2, #3 << 30  @ for rounding add: cmp r0, r2  adc r2, #1
86  bx lr
87
88.fnend
89