17b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// Copyright 2015 Google Inc. All Rights Reserved. 27b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// 37b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// Licensed under the Apache License, Version 2.0 (the "License"); 47b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// you may not use this file except in compliance with the License. 57b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// You may obtain a copy of the License at 67b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// 77b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// http://www.apache.org/licenses/LICENSE-2.0 87b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// 97b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// Unless required by applicable law or agreed to in writing, software 107b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// distributed under the License is distributed on an "AS IS" BASIS, 117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// See the License for the specific language governing permissions and 137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// limitations under the License. 147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 157b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// fixedpoint.h: fixed-point arithmetic, with basic operations and 167b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// a few math functions such as tanh. 177b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 187b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// This is only used in output.h 197b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// for some specific output pipeline stages (tanh); most of gemmlowp 207b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// uses only plain integer arithmetic, not fixed-point arithmetic. 217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// At the most basic level, we distinguish between plain integer 227b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// arithmetic and fixed-point arithmetic by the type of multiplication 237b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// that is used: plain integer arithmetic uses plain (overflowing) 247b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// integer multiplication, whereas fixed-point arithmetic uses 257b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// "multiply-high" instructions, which means using only the most 267b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// significant bits of the product, or equivalently, multiplying 277b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// fixed-point numbers in the [-1 .. +1] interval. 287b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 297b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#ifndef GEMMLOWP_INTERNAL_FIXEDPOINT_H_ 307b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define GEMMLOWP_INTERNAL_FIXEDPOINT_H_ 317b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 327b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#include "common.h" 337b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 347b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#include <limits> 357b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#include <cassert> 367b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 377b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangnamespace gemmlowp { 387b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 397b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 407b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType BitAnd(tIntegerType a, tIntegerType b) { 417b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a & b; 427b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 437b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 447b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 457b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType BitOr(tIntegerType a, tIntegerType b) { 467b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a | b; 477b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 487b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 497b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 507b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType BitXor(tIntegerType a, tIntegerType b) { 517b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a ^ b; 527b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 537b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 547b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 557b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType BitNot(tIntegerType a) { 567b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return ~a; 577b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 587b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 597b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 607b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType Add(tIntegerType a, tIntegerType b) { 617b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a + b; 627b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 637b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 647b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 657b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType Sub(tIntegerType a, tIntegerType b) { 667b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a - b; 677b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 687b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 697b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 707b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType Neg(tIntegerType a) { 717b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return -a; 727b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 737b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 747b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 757b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType ShiftLeft(tIntegerType a, int offset) { 767b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a * (1 << offset); 777b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 787b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 797b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 807b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType ShiftRight(tIntegerType a, int offset) { 817b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a / (1 << offset); 827b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 837b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 847b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 857b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType SelectUsingMask(tIntegerType if_mask, tIntegerType then_val, 867b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tIntegerType else_val) { 877b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return BitXor(BitAnd(if_mask, then_val), BitAnd(BitNot(if_mask), else_val)); 887b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 897b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 907b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 917b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfNonZero(tIntegerType a) { 927b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const tIntegerType zero = 0; 937b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a ? BitNot(zero) : zero; 947b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 957b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 967b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 977b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfZero(tIntegerType a) { 987b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(!a); 997b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1007b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1017b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1027b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfEqual(tIntegerType a, tIntegerType b) { 1037b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(a == b); 1047b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1057b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1067b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1077b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfNotEqual(tIntegerType a, tIntegerType b) { 1087b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(a != b); 1097b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1107b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfGreaterThan(tIntegerType a, tIntegerType b) { 1137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(a > b); 1147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1157b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1167b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1177b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfGreaterThanOrEqual(tIntegerType a, tIntegerType b) { 1187b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(a >= b); 1197b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1207b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1227b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfLessThan(tIntegerType a, tIntegerType b) { 1237b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(a < b); 1247b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1257b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1267b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1277b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtIntegerType MaskIfLessThanOrEqual(tIntegerType a, tIntegerType b) { 1287b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return MaskIfNonZero<tIntegerType>(a <= b); 1297b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1307b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1317b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1327b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangbool All(tIntegerType a) { 1337b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a; 1347b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1357b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1367b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 1377b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangbool Any(tIntegerType a) { 1387b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a; 1397b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1407b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1417b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename IntegerType> 1427b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangIntegerType RoundingHalfSum(IntegerType a, IntegerType b) { 1437b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static_assert(std::is_same<IntegerType, void>::value, "unimplemented"); 1447b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a; 1457b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1467b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1477b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <> 1487b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wanginline int32_t RoundingHalfSum(int32_t a, int32_t b) { 1497b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t a64 = a; 1507b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t b64 = b; 1517b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t sum = a64 + b64; 1527b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t sign = sum >= 0 ? 1 : -1; 1537b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return static_cast<int32_t>((sum + sign) / 2); 1547b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1557b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1567b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename IntegerType> 1577b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangIntegerType SaturatingRoundingDoublingHighMul(IntegerType a, IntegerType b) { 1587b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static_assert(std::is_same<IntegerType, void>::value, "unimplemented"); 1597b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return a; 1607b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1617b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1627b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// This function implements the same computation as the ARMv7 NEON VQRDMULH 1637b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang// instruction. 1647b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <> 1657b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wanginline int32_t SaturatingRoundingDoublingHighMul(int32_t a, int32_t b) { 1667b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang bool overflow = a == b && a == std::numeric_limits<int32_t>::min(); 1677b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t a_64(a); 1687b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t b_64(b); 1697b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int64_t ab_64 = a_64 * b_64; 1707b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int32_t nudge = ab_64 >= 0 ? (1 << 30) : (1 - (1 << 30)); 1717b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int32_t ab_x2_high32 = static_cast<int32_t>((ab_64 + nudge) / (1ll << 31)); 1727b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return overflow ? std::numeric_limits<int32_t>::max() : ab_x2_high32; 1737b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 1747b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1757b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int Exponent, typename IntegerType, 1767b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int ExponentSign = (Exponent > 0 ? 1 : Exponent < 0 ? -1 : 0)> 1777b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangstruct ImplSaturatingRoundingMultiplyByPOT {}; 1787b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1797b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int Exponent, typename IntegerType> 1807b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangstruct ImplSaturatingRoundingMultiplyByPOT<Exponent, IntegerType, 0> { 1817b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static IntegerType eval(IntegerType x) { return x; } 1827b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang}; 1837b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1847b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int Exponent> 1857b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangstruct ImplSaturatingRoundingMultiplyByPOT<Exponent, int32_t, 1> { 1867b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static int32_t eval(int32_t x) { 1877b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const int64_t min = std::numeric_limits<int32_t>::min(); 1887b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const int64_t max = std::numeric_limits<int32_t>::max(); 1897b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return x >= (1 << (31 - Exponent)) ? max : x <= -(1 << (31 - Exponent)) 1907b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ? min 1917b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang : x * (1 << Exponent); 1927b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 1937b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang}; 1947b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 1957b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int Exponent> 1967b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangstruct ImplSaturatingRoundingMultiplyByPOT<Exponent, int32_t, -1> { 1977b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static int32_t eval(int32_t x) { 1987b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int32_t b = (std::abs(x) & (1 << (-Exponent - 1))) >> (-Exponent - 1); 1997b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang int32_t nudge = x >= 0 ? b : -b; 2007b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return x / (1 << -Exponent) + nudge; 2017b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2027b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang}; 2037b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2047b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int Exponent, typename IntegerType> 2057b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangIntegerType SaturatingRoundingMultiplyByPOT(IntegerType x) { 2067b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return ImplSaturatingRoundingMultiplyByPOT<Exponent, IntegerType>::eval(x); 2077b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 2087b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2097b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tIntegerType> 2107b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangstruct FixedPointRawTypeTraits {}; 2117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <> 2137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangstruct FixedPointRawTypeTraits<int32_t> { 2147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef int32_t ScalarRawType; 2157b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kLanes = 1; 2167b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang}; 2177b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2187b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType> 2197b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangtRawType Dup(typename FixedPointRawTypeTraits<tRawType>::ScalarRawType x) { 2207b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return x; 2217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 2227b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2237b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 2247b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangclass FixedPoint { 2257b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang public: 2267b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef tRawType RawType; 2277b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2287b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPointRawTypeTraits<RawType> RawTypeTraits; 2297b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef typename RawTypeTraits::ScalarRawType ScalarRawType; 2307b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2317b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kTotalBits = 8 * sizeof(ScalarRawType); 2327b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kIntegerBits = tIntegerBits; 2337b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kFractionalBits = kTotalBits - 1 - kIntegerBits; 2347b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static_assert(kIntegerBits >= 0 && kIntegerBits < kTotalBits, 2357b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang "bad IntegerBits"); 2367b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2377b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<ScalarRawType, kIntegerBits> ScalarFixedPointType; 2387b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2397b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const ScalarRawType ScalarRawMin() { 2407b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return std::numeric_limits<ScalarRawType>::min(); 2417b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2427b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2437b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const ScalarRawType ScalarRawMax() { 2447b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return std::numeric_limits<ScalarRawType>::max(); 2457b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2467b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2477b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const ScalarRawType RawMin() { 2487b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return VectorFromScalar(ScalarRawMin()); 2497b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2507b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2517b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const ScalarRawType RawMax() { 2527b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return VectorFromScalar(ScalarRawMax()); 2537b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2547b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2557b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static FixedPoint FromRaw(RawType x) { 2567b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint retval; 2577b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang retval.raw() = x; 2587b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return retval; 2597b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2607b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2617b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static FixedPoint FromScalarRaw(ScalarRawType x) { 2627b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint retval; 2637b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang retval.raw() = Dup<RawType>(x); 2647b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return retval; 2657b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2667b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2677b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static FixedPoint FromScalarFixedPoint(ScalarFixedPointType x) { 2687b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FromScalarRaw(x.raw()); 2697b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2707b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2717b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang template <int Exponent> 2727b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static FixedPoint ConstantPOT() { 2737b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kOffset = kFractionalBits + Exponent; 2747b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static_assert( 2757b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang kOffset < 31, 2767b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang "Constant not exactly representable in this fixed-point format"); 2777b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FromScalarRaw(ScalarRawType(1) << kOffset); 2787b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2797b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2807b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static FixedPoint Zero() { return FromScalarRaw(0); } 2817b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2827b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static FixedPoint One() { 2837b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FromScalarRaw(kIntegerBits == 0 2847b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ? ScalarRawMax() 2857b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang : (ScalarRawType(1) << kFractionalBits)); 2867b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 2877b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2887b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang RawType raw() const { return i_; } 2897b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang RawType& raw() { return i_; } 2907b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2917b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang private: 2927b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang RawType i_; 2937b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang}; 2947b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 2957b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits_a, int tIntegerBits_b> 2967b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, tIntegerBits_a + tIntegerBits_b> operator*( 2977b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits_a> a, 2987b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits_b> b) { 2997b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits_a + tIntegerBits_b> c; 3007b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang c.raw() = SaturatingRoundingDoublingHighMul(a.raw(), b.raw()); 3017b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return c; 3027b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3037b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3047b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int tExponent, typename tRawType, int tIntegerBits> 3057b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, tExponent + tIntegerBits> ExactMulByPot( 3067b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> a) { 3077b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tExponent + tIntegerBits> c; 3087b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang c.raw() = a.raw(); 3097b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return c; 3107b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int tExponent, typename tRawType, int tIntegerBits> 3137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, tIntegerBits> SaturatingRoundingMultiplyByPOT( 3147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> a) { 3157b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FixedPoint<tRawType, tIntegerBits>::FromRaw( 3167b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang SaturatingRoundingMultiplyByPOT<tExponent>(a.raw())); 3177b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3187b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3197b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define MAKE_FIXEDPOINT_UNARY_FUNC(FuncName, ImplFuncName) \ 3207b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang template <typename tRawType, int tIntegerBits> \ 3217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> FuncName( \ 3227b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> a) { \ 3237b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FixedPoint<tRawType, tIntegerBits>::FromRaw(ImplFuncName(a.raw())); \ 3247b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 3257b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3267b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define MAKE_FIXEDPOINT_BINARY_FUNC(FuncName, ImplFuncName) \ 3277b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang template <typename tRawType, int tIntegerBits> \ 3287b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> FuncName( \ 3297b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> a, \ 3307b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> b) { \ 3317b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FixedPoint<tRawType, tIntegerBits>::FromRaw( \ 3327b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ImplFuncName(a.raw(), b.raw())); \ 3337b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 3347b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3357b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_UNARY_FUNC(operator-, Neg) 3367b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_UNARY_FUNC(operator~, BitNot) 3377b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC(operator+, Add) 3387b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC(operator-, Sub) 3397b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC(operator&, BitAnd) 3407b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC(operator^, BitXor) 3417b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC(operator|, BitOr) 3427b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC(RoundingHalfSum, RoundingHalfSum) 3437b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3447b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#undef MAKE_FIXEDPOINT_UNARY_FUNC 3457b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#undef MAKE_FIXEDPOINT_BINARY_FUNC 3467b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3477b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(FuncName) \ 3487b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang template <typename tRawType, int tIntegerBits> \ 3497b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tRawType FuncName(FixedPoint<tRawType, tIntegerBits> a) { \ 3507b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FuncName(a.raw()); \ 3517b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 3527b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3537b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(FuncName) \ 3547b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang template <typename tRawType, int tIntegerBits> \ 3557b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tRawType FuncName(FixedPoint<tRawType, tIntegerBits> a, \ 3567b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> b) { \ 3577b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FuncName(a.raw(), b.raw()); \ 3587b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 3597b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3607b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(MaskIfZero) 3617b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW(MaskIfNonZero) 3627b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfEqual) 3637b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfNotEqual) 3647b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfGreaterThan) 3657b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfGreaterThanOrEqual) 3667b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfLessThan) 3677b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangMAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW(MaskIfLessThanOrEqual) 3687b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3697b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#undef MAKE_FIXEDPOINT_UNARY_FUNC_RETURNING_RAW 3707b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#undef MAKE_FIXEDPOINT_BINARY_FUNC_RETURNING_RAW 3717b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3727b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 3737b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, tIntegerBits> SelectUsingMask( 3747b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tRawType if_mask, FixedPoint<tRawType, tIntegerBits> then_val, 3757b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> else_val) { 3767b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return FixedPoint<tRawType, tIntegerBits>::FromRaw( 3777b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang SelectUsingMask(if_mask, then_val.raw(), else_val.raw())); 3787b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3797b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3807b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 3817b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangbool operator==(FixedPoint<tRawType, tIntegerBits> a, 3827b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> b) { 3837b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return All(MaskIfEqual(a.raw(), b.raw())); 3847b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3857b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3867b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 3877b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangbool operator!=(FixedPoint<tRawType, tIntegerBits> a, 3887b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> b) { 3897b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return !(a == b); 3907b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3917b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 3927b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 3937b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangdouble ToDouble(FixedPoint<tRawType, tIntegerBits> x) { 3947b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static_assert(FixedPointRawTypeTraits<tRawType>::kLanes == 1, 3957b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang "not applicable to SIMD types"); 3967b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, tIntegerBits> F; 3977b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return x.raw() / double(1ll << F::kFractionalBits); 3987b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 3997b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4007b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 4017b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, tIntegerBits> ToFixedPoint(double x) { 4027b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, tIntegerBits> F; 4037b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return F::FromScalarRaw(static_cast<int32_t>( 4047b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang std::min(std::max(round(x * double(1ll << F::kFractionalBits)), 4057b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang double(F::ScalarRawMin())), 4067b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang double(F::ScalarRawMax())))); 4077b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 4087b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4097b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <int tIntegerBitsDst, typename tRawType, int tIntegerBitsSrc> 4107b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, tIntegerBitsDst> Rescale( 4117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBitsSrc> x) { 4127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kExponent = tIntegerBitsSrc - tIntegerBitsDst; 4137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBitsDst> result; 4147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang result.raw() = SaturatingRoundingMultiplyByPOT<kExponent>(x.raw()); 4157b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return result; 4167b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 4177b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4187b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#ifdef GEMMLOWP_ENABLE_FIXEDPOINT_CONSTANTS_CHECKS 4197b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename FixedPointType> 4207b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPointType CheckedFixedPointConstant( 4217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typename FixedPointType::ScalarRawType raw_value, double double_value) { 4227b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef typename FixedPointType::RawType RawType; 4237b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kIntegerBits = FixedPointType::kIntegerBits; 4247b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPointType ref = FixedPointType::FromScalarRaw(raw_value); 4257b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPointType check = ToFixedPoint<RawType, kIntegerBits>(double_value); 4267b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang assert(ref == check); 4277b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return ref; 4287b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 4297b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPointType, ScalarRawValue, \ 4307b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang DoubleValue) \ 4317b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang (CheckedFixedPointConstant<FixedPointType>(ScalarRawValue, DoubleValue)) 4327b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4337b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#else 4347b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(FixedPointType, ScalarRawValue, \ 4357b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang DoubleValue) \ 4367b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang (FixedPointType::FromScalarRaw(ScalarRawValue)) 4377b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#endif 4387b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4397b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType> 4407b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, 0> exp_on_interval_between_negative_one_quarter_and_0_excl( 4417b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, 0> a) { 4427b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, 0> F; 4437b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const F constant_term = 4447b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F, 1895147668, std::exp(-1.0 / 8.0)); 4457b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const F constant_1_over_3 = 4467b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F, 715827883, 1.0 / 3.0); 4477b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang // We're evaluating a Taylor expansion around -1/8, so we do the change of 4487b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang // variable: x = a + 1/8. 4497b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang // In fixed-point with 0 integer bits, 1/8 is represented by 1 << 28. 4507b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F x = a + F::template ConstantPOT<-3>(); 4517b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F x2 = x * x; 4527b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F x3 = x2 * x; 4537b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F x4 = x2 * x2; 4547b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F x4_over_4 = SaturatingRoundingMultiplyByPOT<-2>(x4); 4557b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F x4_over_24_plus_x3_over_6_plus_x2_over_2 = 4567b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang SaturatingRoundingMultiplyByPOT<-1>( 4577b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ((x4_over_4 + x3) * constant_1_over_3) + x2); 4587b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return constant_term + 4597b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang constant_term * (x + x4_over_24_plus_x3_over_6_plus_x2_over_2); 4607b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 4617b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4627b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 4637b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, 0> exp_on_negative_values( 4647b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> a) { 4657b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, tIntegerBits> InputF; 4667b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, 0> ResultF; 4677b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kFractionalBits = InputF::kFractionalBits; 4687b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int kIntegerBits = InputF::kIntegerBits; 4697b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const InputF kOneQuarter = InputF::template ConstantPOT<-2>(); 4707b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang InputF mask = kOneQuarter - InputF::FromScalarRaw(1); 4717b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang InputF a_mod_quarter_minus_one_quarter = (a & mask) - kOneQuarter; 4727b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ResultF result = exp_on_interval_between_negative_one_quarter_and_0_excl( 4737b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang Rescale<0>(a_mod_quarter_minus_one_quarter)); 4747b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tRawType remainder = (a_mod_quarter_minus_one_quarter - a).raw(); 4757b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4767b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#define GEMMLOWP_EXP_BARREL_SHIFTER(Exponent, FixedPointMultiplier) \ 4777b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang if (kIntegerBits > Exponent) { \ 4787b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const ResultF kMultiplier = GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT( \ 4797b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ResultF, FixedPointMultiplier, std::exp(-std::pow(2.0, Exponent))); \ 4807b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang result = SelectUsingMask( \ 4817b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang MaskIfNonZero(BitAnd( \ 4827b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang remainder, Dup<tRawType>(1 << (kFractionalBits + Exponent)))), \ 4837b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang result * kMultiplier, result); \ 4847b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 4857b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4867b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(-2, 1672461947); 4877b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(-1, 1302514674); 4887b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(+0, 790015084); 4897b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(+1, 290630308); 4907b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(+2, 39332535); 4917b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(+3, 720401); 4927b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_EXP_BARREL_SHIFTER(+4, 242); 4937b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4947b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#undef GEMMLOWP_EXP_BARREL_SHIFTER 4957b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 4967b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang if (kIntegerBits > 5) { 4977b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang static const int b = kIntegerBits > 5 ? kFractionalBits + 5 : 0; 4987b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const InputF clamp = 4997b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(InputF, -(1 << b), -32.0); 5007b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang result = SelectUsingMask(MaskIfLessThan(a, clamp), ResultF::Zero(), result); 5017b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 5027b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5037b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang result = SelectUsingMask(MaskIfZero(a), ResultF::One(), result); 5047b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return result; 5057b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 5067b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5077b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType> 5087b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, 0> one_minus_x_over_one_plus_x_for_x_in_0_1( 5097b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, 0> a) { 5107b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, 0> F0; 5117b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, 2> F2; 5127b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F0 half_denominator = RoundingHalfSum(a, F0::One()); 5137b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const F2 constant_48_over_17 = 5147b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, 1515870810, 48.0 / 17.0); 5157b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang const F2 constant_neg_32_over_17 = 5167b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang GEMMLOWP_CHECKED_FIXEDPOINT_CONSTANT(F2, -1010580540, -32.0 / 17.0); 5177b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F2 x = constant_48_over_17 + half_denominator * constant_neg_32_over_17; 5187b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang for (int i = 0; i < 3; i++) { 5197b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F2 half_denominator_times_x = half_denominator * x; 5207b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F2 one_minus_half_denominator_times_x = 5217b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang F2::One() - half_denominator_times_x; 5227b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang x = x + Rescale<2>(x * one_minus_half_denominator_times_x); 5237b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang } 5247b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return Rescale<0>(x - F2::One()); 5257b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 5267b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5277b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 5287b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, 0> neg_tanh_on_negative_values( 5297b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang FixedPoint<tRawType, tIntegerBits> a) { 5307b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return one_minus_x_over_one_plus_x_for_x_in_0_1( 5317b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang exp_on_negative_values(ExactMulByPot<1>(a))); 5327b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 5337b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5347b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wangtemplate <typename tRawType, int tIntegerBits> 5357b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao WangFixedPoint<tRawType, 0> tanh(FixedPoint<tRawType, tIntegerBits> a) { 5367b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, tIntegerBits> InputF; 5377b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang typedef FixedPoint<tRawType, 0> ResultF; 5387b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tRawType mask_if_negative = MaskIfLessThan(a, InputF::Zero()); 5397b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang tRawType mask_if_zero = MaskIfZero(a); 5407b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang InputF n = SelectUsingMask(mask_if_negative, a, -a); 5417b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang ResultF t = neg_tanh_on_negative_values(n); 5427b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang return SelectUsingMask(mask_if_zero, ResultF::Zero(), 5437b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang SelectUsingMask(mask_if_negative, -t, t)); 5447b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} 5457b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5467b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang} // end namespace gemmlowp 5477b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5487b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#ifdef GEMMLOWP_NEON 5497b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#include "fixedpoint_neon.h" 5507b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#endif 5517b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang 5527b05d573cf2e0fd3a58e98cdbfc65153a83fd6f1Miao Wang#endif // GEMMLOWP_INTERNAL_FIXEDPOINT_H_ 553