1cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com/*------------------------------------------------------------------------- 2cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * drawElements Base Portability Library 38cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * ------------------------------------- 4cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * 5cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * Copyright 2014 The Android Open Source Project 6cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * 78cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * Licensed under the Apache License, Version 2.0 (the "License"); 8cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * you may not use this file except in compliance with the License. 98cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * You may obtain a copy of the License at 10cf2cfa174ca878c144e17e9fc60ca8e9070d7dededisonn@google.com * 118cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * http://www.apache.org/licenses/LICENSE-2.0 128cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * 138cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * Unless required by applicable law or agreed to in writing, software 148cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * distributed under the License is distributed on an "AS IS" BASIS, 158cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 168cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * See the License for the specific language governing permissions and 178cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * limitations under the License. 188cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * 198cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com *//*! 208cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * \file 218cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com * \brief 32-bit integer math. 228cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com *//*--------------------------------------------------------------------*/ 238cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 248cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com#include "deInt32.h" 258cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 268cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.comDE_BEGIN_EXTERN_C 278cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 288cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.comconst deInt8 g_clzLUT[256] = 298cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com{ 308cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 318cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 328cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 338cee797901763ab0922eb9ef484cfdcbc94bee54edisonn@google.com 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 38 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 43 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 44 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 45 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 46}; 47 48/*--------------------------------------------------------------------*//*! 49 * \brief Compute the reciprocal of an integer. 50 * \param a Input value. 51 * \param rcp Pointer to resulting reciprocal "mantissa" value. 52 * \param exp Pointer to resulting exponent value. 53 * 54 * The returned value is split into an exponent part and a mantissa part for 55 * practical reasons. The actual reciprocal can be computed with the formula: 56 * result = exp2(exp) * rcp / (1<<DE_RCP_FRAC_BITS). 57 *//*--------------------------------------------------------------------*/ 58void deRcp32 (deUint32 a, deUint32* rcp, int* exp) 59{ 60 enum { RCP_LUT_BITS = 8 }; 61 static const deUint32 s_rcpLUT[1<<RCP_LUT_BITS] = 62 { 63 0x40000000, 0x3fc03fc0, 0x3f80fe03, 0x3f423954, 64 0x3f03f03f, 0x3ec62159, 0x3e88cb3c, 0x3e4bec88, 65 0x3e0f83e0, 0x3dd38ff0, 0x3d980f66, 0x3d5d00f5, 66 0x3d226357, 0x3ce8354b, 0x3cae7592, 0x3c7522f3, 67 0x3c3c3c3c, 0x3c03c03c, 0x3bcbadc7, 0x3b9403b9, 68 0x3b5cc0ed, 0x3b25e446, 0x3aef6ca9, 0x3ab95900, 69 0x3a83a83a, 0x3a4e5947, 0x3a196b1e, 0x39e4dcb8, 70 0x39b0ad12, 0x397cdb2c, 0x3949660a, 0x39164cb5, 71 0x38e38e38, 0x38b129a2, 0x387f1e03, 0x384d6a72, 72 0x381c0e07, 0x37eb07dd, 0x37ba5713, 0x3789facb, 73 0x3759f229, 0x372a3c56, 0x36fad87b, 0x36cbc5c7, 74 0x369d0369, 0x366e9095, 0x36406c80, 0x36129663, 75 0x35e50d79, 0x35b7d0ff, 0x358ae035, 0x355e3a5f, 76 0x3531dec0, 0x3505cca2, 0x34da034d, 0x34ae820e, 77 0x34834834, 0x3458550f, 0x342da7f2, 0x34034034, 78 0x33d91d2a, 0x33af3e2e, 0x3385a29d, 0x335c49d4, 79 0x33333333, 0x330a5e1b, 0x32e1c9f0, 0x32b97617, 80 0x329161f9, 0x32698cff, 0x3241f693, 0x321a9e24, 81 0x31f3831f, 0x31cca4f5, 0x31a6031a, 0x317f9d00, 82 0x3159721e, 0x313381ec, 0x310dcbe1, 0x30e84f79, 83 0x30c30c30, 0x309e0184, 0x30792ef5, 0x30549403, 84 0x30303030, 0x300c0300, 0x2fe80bfa, 0x2fc44aa2, 85 0x2fa0be82, 0x2f7d6724, 0x2f5a4411, 0x2f3754d7, 86 0x2f149902, 0x2ef21023, 0x2ecfb9c8, 0x2ead9584, 87 0x2e8ba2e8, 0x2e69e18a, 0x2e4850fe, 0x2e26f0db, 88 0x2e05c0b8, 0x2de4c02d, 0x2dc3eed6, 0x2da34c4d, 89 0x2d82d82d, 0x2d629215, 0x2d4279a2, 0x2d228e75, 90 0x2d02d02d, 0x2ce33e6c, 0x2cc3d8d4, 0x2ca49f0a, 91 0x2c8590b2, 0x2c66ad71, 0x2c47f4ee, 0x2c2966d0, 92 0x2c0b02c0, 0x2becc868, 0x2bceb771, 0x2bb0cf87, 93 0x2b931057, 0x2b75798c, 0x2b580ad6, 0x2b3ac3e2, 94 0x2b1da461, 0x2b00ac02, 0x2ae3da78, 0x2ac72f74, 95 0x2aaaaaaa, 0x2a8e4bcd, 0x2a721291, 0x2a55fead, 96 0x2a3a0fd5, 0x2a1e45c2, 0x2a02a02a, 0x29e71ec5, 97 0x29cbc14e, 0x29b0877d, 0x2995710e, 0x297a7dbb, 98 0x295fad40, 0x2944ff5a, 0x292a73c7, 0x29100a44, 99 0x28f5c28f, 0x28db9c68, 0x28c1978f, 0x28a7b3c5, 100 0x288df0ca, 0x28744e61, 0x285acc4b, 0x28416a4c, 101 0x28282828, 0x280f05a2, 0x27f6027f, 0x27dd1e85, 102 0x27c45979, 0x27abb323, 0x27932b48, 0x277ac1b2, 103 0x27627627, 0x274a4870, 0x27323858, 0x271a45a6, 104 0x27027027, 0x26eab7a3, 0x26d31be7, 0x26bb9cbf, 105 0x26a439f6, 0x268cf359, 0x2675c8b6, 0x265eb9da, 106 0x2647c694, 0x2630eeb1, 0x261a3202, 0x26039055, 107 0x25ed097b, 0x25d69d43, 0x25c04b80, 0x25aa1402, 108 0x2593f69b, 0x257df31c, 0x2568095a, 0x25523925, 109 0x253c8253, 0x2526e4b7, 0x25116025, 0x24fbf471, 110 0x24e6a171, 0x24d166f9, 0x24bc44e1, 0x24a73afd, 111 0x24924924, 0x247d6f2e, 0x2468acf1, 0x24540245, 112 0x243f6f02, 0x242af300, 0x24168e18, 0x24024024, 113 0x23ee08fb, 0x23d9e878, 0x23c5de76, 0x23b1eace, 114 0x239e0d5b, 0x238a45f8, 0x23769480, 0x2362f8cf, 115 0x234f72c2, 0x233c0233, 0x2328a701, 0x23156107, 116 0x23023023, 0x22ef1432, 0x22dc0d12, 0x22c91aa1, 117 0x22b63cbe, 0x22a37347, 0x2290be1c, 0x227e1d1a, 118 0x226b9022, 0x22591713, 0x2246b1ce, 0x22346033, 119 0x22222222, 0x220ff77c, 0x21fde021, 0x21ebdbf5, 120 0x21d9ead7, 0x21c80cab, 0x21b64151, 0x21a488ac, 121 0x2192e29f, 0x21814f0d, 0x216fcdd8, 0x215e5ee4, 122 0x214d0214, 0x213bb74d, 0x212a7e72, 0x21195766, 123 0x21084210, 0x20f73e53, 0x20e64c14, 0x20d56b38, 124 0x20c49ba5, 0x20b3dd40, 0x20a32fef, 0x20929398, 125 0x20820820, 0x20718d6f, 0x2061236a, 0x2050c9f8, 126 0x20408102, 0x2030486c, 0x20202020, 0x20100804 127 }; 128 129 DE_STATIC_ASSERT(DE_LENGTH_OF_ARRAY(s_rcpLUT) == (1<<RCP_LUT_BITS)); 130 131 int shift = deClz32(a); 132 deUint32 normalized = (deUint32)a << shift; /* Highest bit is always 1. */ 133 int lookupNdx = (normalized >> (31 - RCP_LUT_BITS)) & ((1<<RCP_LUT_BITS)-1); /* Discard high bit, leave 8 next highest bits to lowest bits of normalized. */ 134 deUint32 result; 135 deUint32 tmp; 136 137 /* Must be non-negative. */ 138 DE_ASSERT(a > 0); 139 140 /* First approximation from lookup table. */ 141 DE_ASSERT(lookupNdx >= 0 && lookupNdx < (1<<RCP_LUT_BITS)); 142 result = s_rcpLUT[lookupNdx]; 143 144 /* Newton-Raphson iteration for improved accuracy (has 16 bits of accuracy afterwards). */ 145 tmp = deSafeMuluAsr32(result, normalized, 31); 146 result = deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS); 147 148 /* Second Newton-Raphson iteration (now has full accuracy). */ 149 tmp = deSafeMuluAsr32(result, normalized, 31); 150 result = deSafeMuluAsr32(result, (2u<<DE_RCP_FRAC_BITS) - tmp, DE_RCP_FRAC_BITS); 151 152 /* Return results. */ 153 *rcp = result; 154 *exp = 31 - shift; 155} 156 157DE_END_EXTERN_C 158