1/* 2 * Written by Wilco Dijkstra, 1996. The following email exchange establishes the 3 * license. 4 * 5 * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com> 6 * Date: Fri, Jun 24, 2011 at 3:20 AM 7 * Subject: Re: sqrt routine 8 * To: Kevin Ma <kma@google.com> 9 * Hi Kevin, 10 * Thanks for asking. Those routines are public domain (originally posted to 11 * comp.sys.arm a long time ago), so you can use them freely for any purpose. 12 * Cheers, 13 * Wilco 14 * 15 * ----- Original Message ----- 16 * From: "Kevin Ma" <kma@google.com> 17 * To: <Wilco.Dijkstra@ntlworld.com> 18 * Sent: Thursday, June 23, 2011 11:44 PM 19 * Subject: Fwd: sqrt routine 20 * Hi Wilco, 21 * I saw your sqrt routine from several web sites, including 22 * http://www.finesse.demon.co.uk/steven/sqrt.html. 23 * Just wonder if there's any copyright information with your Successive 24 * approximation routines, or if I can freely use it for any purpose. 25 * Thanks. 26 * Kevin 27 */ 28 29// Minor modifications in code style for WebRTC, 2012. 30// Code optimizations for MIPS, 2013. 31 32#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 33 34/* 35 * Algorithm: 36 * Successive approximation of the equation (root + delta) ^ 2 = N 37 * until delta < 1. If delta < 1 we have the integer part of SQRT (N). 38 * Use delta = 2^i for i = 15 .. 0. 39 * 40 * Output precision is 16 bits. Note for large input values (close to 41 * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) 42 * contains the MSB information (a non-sign value). Do with caution 43 * if you need to cast the output to int16_t type. 44 * 45 * If the input value is negative, it returns 0. 46 */ 47 48 49int32_t WebRtcSpl_SqrtFloor(int32_t value) 50{ 51 int32_t root = 0, tmp1, tmp2, tmp3, tmp4; 52 53 __asm __volatile( 54 ".set push \n\t" 55 ".set noreorder \n\t" 56 57 "lui %[tmp1], 0x4000 \n\t" 58 "slt %[tmp2], %[value], %[tmp1] \n\t" 59 "sub %[tmp3], %[value], %[tmp1] \n\t" 60 "lui %[tmp1], 0x1 \n\t" 61 "or %[tmp4], %[root], %[tmp1] \n\t" 62 "movz %[value], %[tmp3], %[tmp2] \n\t" 63 "movz %[root], %[tmp4], %[tmp2] \n\t" 64 65 "addiu %[tmp1], $0, 0x4000 \n\t" 66 "addu %[tmp1], %[tmp1], %[root] \n\t" 67 "sll %[tmp1], 14 \n\t" 68 "slt %[tmp2], %[value], %[tmp1] \n\t" 69 "subu %[tmp3], %[value], %[tmp1] \n\t" 70 "ori %[tmp4], %[root], 0x8000 \n\t" 71 "movz %[value], %[tmp3], %[tmp2] \n\t" 72 "movz %[root], %[tmp4], %[tmp2] \n\t" 73 74 "addiu %[tmp1], $0, 0x2000 \n\t" 75 "addu %[tmp1], %[tmp1], %[root] \n\t" 76 "sll %[tmp1], 13 \n\t" 77 "slt %[tmp2], %[value], %[tmp1] \n\t" 78 "subu %[tmp3], %[value], %[tmp1] \n\t" 79 "ori %[tmp4], %[root], 0x4000 \n\t" 80 "movz %[value], %[tmp3], %[tmp2] \n\t" 81 "movz %[root], %[tmp4], %[tmp2] \n\t" 82 83 "addiu %[tmp1], $0, 0x1000 \n\t" 84 "addu %[tmp1], %[tmp1], %[root] \n\t" 85 "sll %[tmp1], 12 \n\t" 86 "slt %[tmp2], %[value], %[tmp1] \n\t" 87 "subu %[tmp3], %[value], %[tmp1] \n\t" 88 "ori %[tmp4], %[root], 0x2000 \n\t" 89 "movz %[value], %[tmp3], %[tmp2] \n\t" 90 "movz %[root], %[tmp4], %[tmp2] \n\t" 91 92 "addiu %[tmp1], $0, 0x800 \n\t" 93 "addu %[tmp1], %[tmp1], %[root] \n\t" 94 "sll %[tmp1], 11 \n\t" 95 "slt %[tmp2], %[value], %[tmp1] \n\t" 96 "subu %[tmp3], %[value], %[tmp1] \n\t" 97 "ori %[tmp4], %[root], 0x1000 \n\t" 98 "movz %[value], %[tmp3], %[tmp2] \n\t" 99 "movz %[root], %[tmp4], %[tmp2] \n\t" 100 101 "addiu %[tmp1], $0, 0x400 \n\t" 102 "addu %[tmp1], %[tmp1], %[root] \n\t" 103 "sll %[tmp1], 10 \n\t" 104 "slt %[tmp2], %[value], %[tmp1] \n\t" 105 "subu %[tmp3], %[value], %[tmp1] \n\t" 106 "ori %[tmp4], %[root], 0x800 \n\t" 107 "movz %[value], %[tmp3], %[tmp2] \n\t" 108 "movz %[root], %[tmp4], %[tmp2] \n\t" 109 110 "addiu %[tmp1], $0, 0x200 \n\t" 111 "addu %[tmp1], %[tmp1], %[root] \n\t" 112 "sll %[tmp1], 9 \n\t" 113 "slt %[tmp2], %[value], %[tmp1] \n\t" 114 "subu %[tmp3], %[value], %[tmp1] \n\t" 115 "ori %[tmp4], %[root], 0x400 \n\t" 116 "movz %[value], %[tmp3], %[tmp2] \n\t" 117 "movz %[root], %[tmp4], %[tmp2] \n\t" 118 119 "addiu %[tmp1], $0, 0x100 \n\t" 120 "addu %[tmp1], %[tmp1], %[root] \n\t" 121 "sll %[tmp1], 8 \n\t" 122 "slt %[tmp2], %[value], %[tmp1] \n\t" 123 "subu %[tmp3], %[value], %[tmp1] \n\t" 124 "ori %[tmp4], %[root], 0x200 \n\t" 125 "movz %[value], %[tmp3], %[tmp2] \n\t" 126 "movz %[root], %[tmp4], %[tmp2] \n\t" 127 128 "addiu %[tmp1], $0, 0x80 \n\t" 129 "addu %[tmp1], %[tmp1], %[root] \n\t" 130 "sll %[tmp1], 7 \n\t" 131 "slt %[tmp2], %[value], %[tmp1] \n\t" 132 "subu %[tmp3], %[value], %[tmp1] \n\t" 133 "ori %[tmp4], %[root], 0x100 \n\t" 134 "movz %[value], %[tmp3], %[tmp2] \n\t" 135 "movz %[root], %[tmp4], %[tmp2] \n\t" 136 137 "addiu %[tmp1], $0, 0x40 \n\t" 138 "addu %[tmp1], %[tmp1], %[root] \n\t" 139 "sll %[tmp1], 6 \n\t" 140 "slt %[tmp2], %[value], %[tmp1] \n\t" 141 "subu %[tmp3], %[value], %[tmp1] \n\t" 142 "ori %[tmp4], %[root], 0x80 \n\t" 143 "movz %[value], %[tmp3], %[tmp2] \n\t" 144 "movz %[root], %[tmp4], %[tmp2] \n\t" 145 146 "addiu %[tmp1], $0, 0x20 \n\t" 147 "addu %[tmp1], %[tmp1], %[root] \n\t" 148 "sll %[tmp1], 5 \n\t" 149 "slt %[tmp2], %[value], %[tmp1] \n\t" 150 "subu %[tmp3], %[value], %[tmp1] \n\t" 151 "ori %[tmp4], %[root], 0x40 \n\t" 152 "movz %[value], %[tmp3], %[tmp2] \n\t" 153 "movz %[root], %[tmp4], %[tmp2] \n\t" 154 155 "addiu %[tmp1], $0, 0x10 \n\t" 156 "addu %[tmp1], %[tmp1], %[root] \n\t" 157 "sll %[tmp1], 4 \n\t" 158 "slt %[tmp2], %[value], %[tmp1] \n\t" 159 "subu %[tmp3], %[value], %[tmp1] \n\t" 160 "ori %[tmp4], %[root], 0x20 \n\t" 161 "movz %[value], %[tmp3], %[tmp2] \n\t" 162 "movz %[root], %[tmp4], %[tmp2] \n\t" 163 164 "addiu %[tmp1], $0, 0x8 \n\t" 165 "addu %[tmp1], %[tmp1], %[root] \n\t" 166 "sll %[tmp1], 3 \n\t" 167 "slt %[tmp2], %[value], %[tmp1] \n\t" 168 "subu %[tmp3], %[value], %[tmp1] \n\t" 169 "ori %[tmp4], %[root], 0x10 \n\t" 170 "movz %[value], %[tmp3], %[tmp2] \n\t" 171 "movz %[root], %[tmp4], %[tmp2] \n\t" 172 173 "addiu %[tmp1], $0, 0x4 \n\t" 174 "addu %[tmp1], %[tmp1], %[root] \n\t" 175 "sll %[tmp1], 2 \n\t" 176 "slt %[tmp2], %[value], %[tmp1] \n\t" 177 "subu %[tmp3], %[value], %[tmp1] \n\t" 178 "ori %[tmp4], %[root], 0x8 \n\t" 179 "movz %[value], %[tmp3], %[tmp2] \n\t" 180 "movz %[root], %[tmp4], %[tmp2] \n\t" 181 182 "addiu %[tmp1], $0, 0x2 \n\t" 183 "addu %[tmp1], %[tmp1], %[root] \n\t" 184 "sll %[tmp1], 1 \n\t" 185 "slt %[tmp2], %[value], %[tmp1] \n\t" 186 "subu %[tmp3], %[value], %[tmp1] \n\t" 187 "ori %[tmp4], %[root], 0x4 \n\t" 188 "movz %[value], %[tmp3], %[tmp2] \n\t" 189 "movz %[root], %[tmp4], %[tmp2] \n\t" 190 191 "addiu %[tmp1], $0, 0x1 \n\t" 192 "addu %[tmp1], %[tmp1], %[root] \n\t" 193 "slt %[tmp2], %[value], %[tmp1] \n\t" 194 "ori %[tmp4], %[root], 0x2 \n\t" 195 "movz %[root], %[tmp4], %[tmp2] \n\t" 196 197 ".set pop \n\t" 198 199 : [root] "+r" (root), [value] "+r" (value), 200 [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), 201 [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) 202 : 203 ); 204 205 return root >> 1; 206} 207 208