1
2/*
3 * Copyright 2006 The Android Open Source Project
4 *
5 * Use of this source code is governed by a BSD-style license that can be
6 * found in the LICENSE file.
7 */
8
9
10#ifndef SkMath_DEFINED
11#define SkMath_DEFINED
12
13#include "SkTypes.h"
14
15// 64bit -> 32bit utilities
16
17/**
18 *  Return true iff the 64bit value can exactly be represented in signed 32bits
19 */
20static inline bool sk_64_isS32(int64_t value) {
21    return (int32_t)value == value;
22}
23
24/**
25 *  Return the 64bit argument as signed 32bits, asserting in debug that the arg
26 *  exactly fits in signed 32bits. In the release build, no checks are preformed
27 *  and the return value if the arg does not fit is undefined.
28 */
29static inline int32_t sk_64_asS32(int64_t value) {
30    SkASSERT(sk_64_isS32(value));
31    return (int32_t)value;
32}
33
34// Handy util that can be passed two ints, and will automatically promote to
35// 64bits before the multiply, so the caller doesn't have to remember to cast
36// e.g. (int64_t)a * b;
37static inline int64_t sk_64_mul(int64_t a, int64_t b) {
38    return a * b;
39}
40
41///////////////////////////////////////////////////////////////////////////////
42
43/**
44 *  Computes numer1 * numer2 / denom in full 64 intermediate precision.
45 *  It is an error for denom to be 0. There is no special handling if
46 *  the result overflows 32bits.
47 */
48static inline int32_t SkMulDiv(int32_t numer1, int32_t numer2, int32_t denom) {
49    SkASSERT(denom);
50
51    int64_t tmp = sk_64_mul(numer1, numer2) / denom;
52    return sk_64_asS32(tmp);
53}
54
55/**
56 *  Computes (numer1 << shift) / denom in full 64 intermediate precision.
57 *  It is an error for denom to be 0. There is no special handling if
58 *  the result overflows 32bits.
59 */
60int32_t SkDivBits(int32_t numer, int32_t denom, int shift);
61
62/**
63 *  Return the integer square root of value, with a bias of bitBias
64 */
65int32_t SkSqrtBits(int32_t value, int bitBias);
66
67/** Return the integer square root of n, treated as a SkFixed (16.16)
68 */
69#define SkSqrt32(n)         SkSqrtBits(n, 15)
70
71//! Returns the number of leading zero bits (0...32)
72int SkCLZ_portable(uint32_t);
73
74#ifndef SkCLZ
75    #if defined(_MSC_VER) && _MSC_VER >= 1400
76        #include <intrin.h>
77
78        static inline int SkCLZ(uint32_t mask) {
79            if (mask) {
80                DWORD index;
81                _BitScanReverse(&index, mask);
82                return index ^ 0x1F;
83            } else {
84                return 32;
85            }
86        }
87    #elif defined(SK_CPU_ARM32) || defined(__GNUC__) || defined(__clang__)
88        static inline int SkCLZ(uint32_t mask) {
89            // __builtin_clz(0) is undefined, so we have to detect that case.
90            return mask ? __builtin_clz(mask) : 32;
91        }
92    #else
93        #define SkCLZ(x)    SkCLZ_portable(x)
94    #endif
95#endif
96
97/**
98 *  Returns (value < 0 ? 0 : value) efficiently (i.e. no compares or branches)
99 */
100static inline int SkClampPos(int value) {
101    return value & ~(value >> 31);
102}
103
104/** Given an integer and a positive (max) integer, return the value
105 *  pinned against 0 and max, inclusive.
106 *  @param value    The value we want returned pinned between [0...max]
107 *  @param max      The positive max value
108 *  @return 0 if value < 0, max if value > max, else value
109 */
110static inline int SkClampMax(int value, int max) {
111    // ensure that max is positive
112    SkASSERT(max >= 0);
113    if (value < 0) {
114        value = 0;
115    }
116    if (value > max) {
117        value = max;
118    }
119    return value;
120}
121
122/**
123 *  Returns the smallest power-of-2 that is >= the specified value. If value
124 *  is already a power of 2, then it is returned unchanged. It is undefined
125 *  if value is <= 0.
126 */
127static inline int SkNextPow2(int value) {
128    SkASSERT(value > 0);
129    return 1 << (32 - SkCLZ(value - 1));
130}
131
132/**
133 *  Returns the log2 of the specified value, were that value to be rounded up
134 *  to the next power of 2. It is undefined to pass 0. Examples:
135 *  SkNextLog2(1) -> 0
136 *  SkNextLog2(2) -> 1
137 *  SkNextLog2(3) -> 2
138 *  SkNextLog2(4) -> 2
139 *  SkNextLog2(5) -> 3
140 */
141static inline int SkNextLog2(uint32_t value) {
142    SkASSERT(value != 0);
143    return 32 - SkCLZ(value - 1);
144}
145
146/**
147 *  Returns true if value is a power of 2. Does not explicitly check for
148 *  value <= 0.
149 */
150static inline bool SkIsPow2(int value) {
151    return (value & (value - 1)) == 0;
152}
153
154///////////////////////////////////////////////////////////////////////////////
155
156/**
157 *  SkMulS16(a, b) multiplies a * b, but requires that a and b are both int16_t.
158 *  With this requirement, we can generate faster instructions on some
159 *  architectures.
160 */
161#ifdef SK_ARM_HAS_EDSP
162    static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
163        SkASSERT((int16_t)x == x);
164        SkASSERT((int16_t)y == y);
165        int32_t product;
166        asm("smulbb %0, %1, %2 \n"
167            : "=r"(product)
168            : "r"(x), "r"(y)
169            );
170        return product;
171    }
172#else
173    #ifdef SK_DEBUG
174        static inline int32_t SkMulS16(S16CPU x, S16CPU y) {
175            SkASSERT((int16_t)x == x);
176            SkASSERT((int16_t)y == y);
177            return x * y;
178        }
179    #else
180        #define SkMulS16(x, y)  ((x) * (y))
181    #endif
182#endif
183
184/**
185 *  Return a*b/((1 << shift) - 1), rounding any fractional bits.
186 *  Only valid if a and b are unsigned and <= 32767 and shift is > 0 and <= 8
187 */
188static inline unsigned SkMul16ShiftRound(U16CPU a, U16CPU b, int shift) {
189    SkASSERT(a <= 32767);
190    SkASSERT(b <= 32767);
191    SkASSERT(shift > 0 && shift <= 8);
192    unsigned prod = SkMulS16(a, b) + (1 << (shift - 1));
193    return (prod + (prod >> shift)) >> shift;
194}
195
196/**
197 *  Return a*b/255, rounding any fractional bits.
198 *  Only valid if a and b are unsigned and <= 32767.
199 */
200static inline U8CPU SkMulDiv255Round(U16CPU a, U16CPU b) {
201    SkASSERT(a <= 32767);
202    SkASSERT(b <= 32767);
203    unsigned prod = SkMulS16(a, b) + 128;
204    return (prod + (prod >> 8)) >> 8;
205}
206
207/**
208 * Stores numer/denom and numer%denom into div and mod respectively.
209 */
210template <typename In, typename Out>
211inline void SkTDivMod(In numer, In denom, Out* div, Out* mod) {
212#ifdef SK_CPU_ARM32
213    // If we wrote this as in the else branch, GCC won't fuse the two into one
214    // divmod call, but rather a div call followed by a divmod.  Silly!  This
215    // version is just as fast as calling __aeabi_[u]idivmod manually, but with
216    // prettier code.
217    //
218    // This benches as around 2x faster than the code in the else branch.
219    const In d = numer/denom;
220    *div = static_cast<Out>(d);
221    *mod = static_cast<Out>(numer-d*denom);
222#else
223    // On x86 this will just be a single idiv.
224    *div = static_cast<Out>(numer/denom);
225    *mod = static_cast<Out>(numer%denom);
226#endif
227}
228
229#endif
230