1/*
2 * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
3 * license.
4 *
5 * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
6 * Date: Fri, Jun 24, 2011 at 3:20 AM
7 * Subject: Re: sqrt routine
8 * To: Kevin Ma <kma@google.com>
9 * Hi Kevin,
10 * Thanks for asking. Those routines are public domain (originally posted to
11 * comp.sys.arm a long time ago), so you can use them freely for any purpose.
12 * Cheers,
13 * Wilco
14 *
15 * ----- Original Message -----
16 * From: "Kevin Ma" <kma@google.com>
17 * To: <Wilco.Dijkstra@ntlworld.com>
18 * Sent: Thursday, June 23, 2011 11:44 PM
19 * Subject: Fwd: sqrt routine
20 * Hi Wilco,
21 * I saw your sqrt routine from several web sites, including
22 * http://www.finesse.demon.co.uk/steven/sqrt.html.
23 * Just wonder if there's any copyright information with your Successive
24 * approximation routines, or if I can freely use it for any purpose.
25 * Thanks.
26 * Kevin
27 */
28
29// Minor modifications in code style for WebRTC, 2012.
30// Code optimizations for MIPS, 2013.
31
32#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
33
34/*
35 * Algorithm:
36 * Successive approximation of the equation (root + delta) ^ 2 = N
37 * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
38 * Use delta = 2^i for i = 15 .. 0.
39 *
40 * Output precision is 16 bits. Note for large input values (close to
41 * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
42 * contains the MSB information (a non-sign value). Do with caution
43 * if you need to cast the output to int16_t type.
44 *
45 * If the input value is negative, it returns 0.
46 */
47
48
49int32_t WebRtcSpl_SqrtFloor(int32_t value)
50{
51  int32_t root = 0, tmp1, tmp2, tmp3, tmp4;
52
53  __asm __volatile(
54    ".set   push                                       \n\t"
55    ".set   noreorder                                  \n\t"
56
57    "lui    %[tmp1],      0x4000                       \n\t"
58    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
59    "sub    %[tmp3],      %[value],     %[tmp1]        \n\t"
60    "lui    %[tmp1],      0x1                          \n\t"
61    "or     %[tmp4],      %[root],      %[tmp1]        \n\t"
62    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
63    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
64
65    "addiu  %[tmp1],      $0,           0x4000         \n\t"
66    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
67    "sll    %[tmp1],      14                           \n\t"
68    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
69    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
70    "ori    %[tmp4],      %[root],      0x8000         \n\t"
71    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
72    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
73
74    "addiu  %[tmp1],      $0,           0x2000         \n\t"
75    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
76    "sll    %[tmp1],      13                           \n\t"
77    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
78    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
79    "ori    %[tmp4],      %[root],      0x4000         \n\t"
80    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
81    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
82
83    "addiu  %[tmp1],      $0,           0x1000         \n\t"
84    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
85    "sll    %[tmp1],      12                           \n\t"
86    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
87    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
88    "ori    %[tmp4],      %[root],      0x2000         \n\t"
89    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
90    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
91
92    "addiu  %[tmp1],      $0,           0x800          \n\t"
93    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
94    "sll    %[tmp1],      11                           \n\t"
95    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
96    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
97    "ori    %[tmp4],      %[root],      0x1000         \n\t"
98    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
99    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
100
101    "addiu  %[tmp1],      $0,           0x400          \n\t"
102    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
103    "sll    %[tmp1],      10                           \n\t"
104    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
105    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
106    "ori    %[tmp4],      %[root],      0x800          \n\t"
107    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
108    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
109
110    "addiu  %[tmp1],      $0,           0x200          \n\t"
111    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
112    "sll    %[tmp1],      9                            \n\t"
113    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
114    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
115    "ori    %[tmp4],      %[root],       0x400         \n\t"
116    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
117    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
118
119    "addiu  %[tmp1],      $0,           0x100          \n\t"
120    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
121    "sll    %[tmp1],      8                            \n\t"
122    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
123    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
124    "ori    %[tmp4],      %[root],      0x200          \n\t"
125    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
126    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
127
128    "addiu  %[tmp1],      $0,           0x80           \n\t"
129    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
130    "sll    %[tmp1],      7                            \n\t"
131    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
132    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
133    "ori    %[tmp4],      %[root],      0x100          \n\t"
134    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
135    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
136
137    "addiu  %[tmp1],      $0,           0x40           \n\t"
138    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
139    "sll    %[tmp1],      6                            \n\t"
140    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
141    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
142    "ori    %[tmp4],      %[root],      0x80           \n\t"
143    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
144    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
145
146    "addiu  %[tmp1],      $0,           0x20           \n\t"
147    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
148    "sll    %[tmp1],      5                            \n\t"
149    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
150    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
151    "ori    %[tmp4],      %[root],      0x40           \n\t"
152    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
153    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
154
155    "addiu  %[tmp1],      $0,           0x10           \n\t"
156    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
157    "sll    %[tmp1],      4                            \n\t"
158    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
159    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
160    "ori    %[tmp4],      %[root],      0x20           \n\t"
161    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
162    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
163
164    "addiu  %[tmp1],      $0,           0x8            \n\t"
165    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
166    "sll    %[tmp1],      3                            \n\t"
167    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
168    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
169    "ori    %[tmp4],      %[root],      0x10           \n\t"
170    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
171    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
172
173    "addiu  %[tmp1],      $0,           0x4            \n\t"
174    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
175    "sll    %[tmp1],      2                            \n\t"
176    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
177    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
178    "ori    %[tmp4],      %[root],      0x8            \n\t"
179    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
180    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
181
182    "addiu  %[tmp1],      $0,           0x2            \n\t"
183    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
184    "sll    %[tmp1],      1                            \n\t"
185    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
186    "subu   %[tmp3],      %[value],     %[tmp1]        \n\t"
187    "ori    %[tmp4],      %[root],      0x4            \n\t"
188    "movz   %[value],     %[tmp3],      %[tmp2]        \n\t"
189    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
190
191    "addiu  %[tmp1],      $0,           0x1            \n\t"
192    "addu   %[tmp1],      %[tmp1],      %[root]        \n\t"
193    "slt    %[tmp2],      %[value],     %[tmp1]        \n\t"
194    "ori    %[tmp4],      %[root],      0x2            \n\t"
195    "movz   %[root],      %[tmp4],      %[tmp2]        \n\t"
196
197    ".set   pop                                        \n\t"
198
199    : [root] "+r" (root), [value] "+r" (value),
200      [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2),
201      [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4)
202    :
203  );
204
205  return root >> 1;
206}
207
208