138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 3752f90673ebbb6b2f55fc5e46606dea371313713sewardj/*--- begin host_generic_simd64.c ---*/ 438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* 7752f90673ebbb6b2f55fc5e46606dea371313713sewardj This file is part of Valgrind, a dynamic binary instrumentation 8752f90673ebbb6b2f55fc5e46606dea371313713sewardj framework. 938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1089ae8477745fd2a15453557d729a50e627325ee2sewardj Copyright (C) 2004-2013 OpenWorks LLP 11752f90673ebbb6b2f55fc5e46606dea371313713sewardj info@open-works.net 127bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 13752f90673ebbb6b2f55fc5e46606dea371313713sewardj This program is free software; you can redistribute it and/or 14752f90673ebbb6b2f55fc5e46606dea371313713sewardj modify it under the terms of the GNU General Public License as 15752f90673ebbb6b2f55fc5e46606dea371313713sewardj published by the Free Software Foundation; either version 2 of the 16752f90673ebbb6b2f55fc5e46606dea371313713sewardj License, or (at your option) any later version. 177bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 18752f90673ebbb6b2f55fc5e46606dea371313713sewardj This program is distributed in the hope that it will be useful, but 19752f90673ebbb6b2f55fc5e46606dea371313713sewardj WITHOUT ANY WARRANTY; without even the implied warranty of 20752f90673ebbb6b2f55fc5e46606dea371313713sewardj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21752f90673ebbb6b2f55fc5e46606dea371313713sewardj General Public License for more details. 22752f90673ebbb6b2f55fc5e46606dea371313713sewardj 23752f90673ebbb6b2f55fc5e46606dea371313713sewardj You should have received a copy of the GNU General Public License 24752f90673ebbb6b2f55fc5e46606dea371313713sewardj along with this program; if not, write to the Free Software 25752f90673ebbb6b2f55fc5e46606dea371313713sewardj Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 267bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 02110-1301, USA. 277bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 28752f90673ebbb6b2f55fc5e46606dea371313713sewardj The GNU General Public License is contained in the file COPYING. 2938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 3038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Neither the names of the U.S. Department of Energy nor the 3138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj University of California nor the names of its contributors may be 3238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj used to endorse or promote products derived from this software 3338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj without prior written permission. 3438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/ 3538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 3638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Generic helper functions for doing 64-bit SIMD arithmetic in cases 3738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj where the instruction selectors cannot generate code in-line. 3838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj These are purely back-end entities and cannot be seen/referenced 398bde7f1c67483371551aac0d4019c24c919063f7sewardj from IR. There are also helpers for 32-bit arithmetic in here. */ 4038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj#include "libvex_basictypes.h" 428bde7f1c67483371551aac0d4019c24c919063f7sewardj#include "main_util.h" // LIKELY, UNLIKELY 43cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "host_generic_simd64.h" 4438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 32x2 vectors. */ 4838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk32x2 ( UInt w1, UInt w0 ) { 5038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (((ULong)w1) << 32) | ((ULong)w0); 5138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 5238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 5338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_1 ( ULong w64 ) { 54d19fc161147086f31126fef0955b426b4f843d02sewardj return 0xFFFFFFFF & toUInt(w64 >> 32); 5538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 5638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_0 ( ULong w64 ) { 57d19fc161147086f31126fef0955b426b4f843d02sewardj return 0xFFFFFFFF & toUInt(w64); 5838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 5938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 6038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 6138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless 6238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj with 64-bit shifts so we give it a hand. */ 6338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 6438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk16x4 ( UShort w3, UShort w2, 6538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort w1, UShort w0 ) { 6638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2); 6738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0); 6838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2(hi32, lo32); 6938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 7038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_3 ( ULong w64 ) { 72d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 73d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & (hi32 >> 16)); 7438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 7538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_2 ( ULong w64 ) { 76d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 77d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & hi32); 7838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 7938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_1 ( ULong w64 ) { 8038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 81d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & (lo32 >> 16)); 8238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 8338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_0 ( ULong w64 ) { 8438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 85d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & lo32); 8638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 8738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 8838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 8938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 8x8 vectors. */ 9038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 9138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk8x8 ( UChar w7, UChar w6, 9238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UChar w5, UChar w4, 9338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UChar w3, UChar w2, 94e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UChar w1, UChar w0 ) { 9538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt hi32 = (((UInt)w7) << 24) | (((UInt)w6) << 16) 9638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj | (((UInt)w5) << 8) | (((UInt)w4) << 0); 9738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (((UInt)w3) << 24) | (((UInt)w2) << 16) 9838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj | (((UInt)w1) << 8) | (((UInt)w0) << 0); 9938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2(hi32, lo32); 10038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 10138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 10238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_7 ( ULong w64 ) { 103d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 104d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 24)); 10538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 10638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_6 ( ULong w64 ) { 107d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 108d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 16)); 10938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 11038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_5 ( ULong w64 ) { 111d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 112d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 8)); 11338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 11438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_4 ( ULong w64 ) { 115d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 116d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 0)); 11738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 11838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_3 ( ULong w64 ) { 11938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 120d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 24)); 12138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 12238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_2 ( ULong w64 ) { 12338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 124d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 16)); 12538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 12638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_1 ( ULong w64 ) { 12738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 128d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 8)); 12938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 13038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_0 ( ULong w64 ) { 13138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 132d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 0)); 13338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 13438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 135d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar index8x8 ( ULong w64, UChar ix ) { 136d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ix &= 7; 137d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return toUChar((w64 >> (8*ix)) & 0xFF); 138d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 139d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 14038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 14138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Scalar helpers. */ 14238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 14344ce46d5945ed83d96695d280510cc2a858894dcsewardjstatic inline Int qadd32S ( Int xx, Int yy ) 14444ce46d5945ed83d96695d280510cc2a858894dcsewardj{ 14544ce46d5945ed83d96695d280510cc2a858894dcsewardj Long t = ((Long)xx) + ((Long)yy); 14644ce46d5945ed83d96695d280510cc2a858894dcsewardj const Long loLim = -0x80000000LL; 14744ce46d5945ed83d96695d280510cc2a858894dcsewardj const Long hiLim = 0x7FFFFFFFLL; 14844ce46d5945ed83d96695d280510cc2a858894dcsewardj if (t < loLim) t = loLim; 14944ce46d5945ed83d96695d280510cc2a858894dcsewardj if (t > hiLim) t = hiLim; 15044ce46d5945ed83d96695d280510cc2a858894dcsewardj return (Int)t; 15144ce46d5945ed83d96695d280510cc2a858894dcsewardj} 15244ce46d5945ed83d96695d280510cc2a858894dcsewardj 15338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qadd16S ( Short xx, Short yy ) 15438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 15538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) + ((Int)yy); 15638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -32768) t = -32768; 15738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 32767) t = 32767; 15838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 15938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 16038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 16138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qadd8S ( Char xx, Char yy ) 16238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 16338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) + ((Int)yy); 16438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -128) t = -128; 16538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 127) t = 127; 16638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Char)t; 16738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 16838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 16938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qadd16U ( UShort xx, UShort yy ) 17038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 17138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt t = ((UInt)xx) + ((UInt)yy); 17238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFFFF) t = 0xFFFF; 17338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)t; 17438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 17538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 17638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qadd8U ( UChar xx, UChar yy ) 17738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 17838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt t = ((UInt)xx) + ((UInt)yy); 17938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFF) t = 0xFF; 18038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)t; 18138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 18238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 18344ce46d5945ed83d96695d280510cc2a858894dcsewardjstatic inline Int qsub32S ( Int xx, Int yy ) 18444ce46d5945ed83d96695d280510cc2a858894dcsewardj{ 18544ce46d5945ed83d96695d280510cc2a858894dcsewardj Long t = ((Long)xx) - ((Long)yy); 18644ce46d5945ed83d96695d280510cc2a858894dcsewardj const Long loLim = -0x80000000LL; 18744ce46d5945ed83d96695d280510cc2a858894dcsewardj const Long hiLim = 0x7FFFFFFFLL; 18844ce46d5945ed83d96695d280510cc2a858894dcsewardj if (t < loLim) t = loLim; 18944ce46d5945ed83d96695d280510cc2a858894dcsewardj if (t > hiLim) t = hiLim; 19044ce46d5945ed83d96695d280510cc2a858894dcsewardj return (Int)t; 19144ce46d5945ed83d96695d280510cc2a858894dcsewardj} 19244ce46d5945ed83d96695d280510cc2a858894dcsewardj 19338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qsub16S ( Short xx, Short yy ) 19438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 19538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 19638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -32768) t = -32768; 19738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 32767) t = 32767; 19838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 19938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 20038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 20138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qsub8S ( Char xx, Char yy ) 20238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 20338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 20438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -128) t = -128; 20538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 127) t = 127; 20638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Char)t; 20738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 20838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 20938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qsub16U ( UShort xx, UShort yy ) 21038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 21138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 21238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < 0) t = 0; 21338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFFFF) t = 0xFFFF; 21438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)t; 21538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 21638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 21738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qsub8U ( UChar xx, UChar yy ) 21838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 21938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 22038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < 0) t = 0; 22138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFF) t = 0xFF; 22238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)t; 22338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 22438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 22538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mul16 ( Short xx, Short yy ) 22638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 22738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) * ((Int)yy); 22838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 22938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 23038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 231d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline Int mul32 ( Int xx, Int yy ) 232d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 233d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj Int t = ((Int)xx) * ((Int)yy); 234d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return (Int)t; 235d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 236d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 23738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mulhi16S ( Short xx, Short yy ) 23838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 23938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) * ((Int)yy); 24038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj t >>=/*s*/ 16; 24138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 24238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 24338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 24438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort mulhi16U ( UShort xx, UShort yy ) 24538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 24638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt t = ((UInt)xx) * ((UInt)yy); 24738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj t >>=/*u*/ 16; 24838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)t; 24938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 25038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 25138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpeq32 ( UInt xx, UInt yy ) 25238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 25338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return xx==yy ? 0xFFFFFFFF : 0; 25438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 25538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 25638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpeq16 ( UShort xx, UShort yy ) 25738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 258d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(xx==yy ? 0xFFFF : 0); 25938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 26038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 26138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpeq8 ( UChar xx, UChar yy ) 26238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 263d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(xx==yy ? 0xFF : 0); 26438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 26538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 26638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpgt32S ( Int xx, Int yy ) 26738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 26838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return xx>yy ? 0xFFFFFFFF : 0; 26938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 27038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 27138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpgt16S ( Short xx, Short yy ) 27238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 273d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(xx>yy ? 0xFFFF : 0); 27438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 27538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 27638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpgt8S ( Char xx, Char yy ) 27738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 278d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(xx>yy ? 0xFF : 0); 27938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 28038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 2811806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UInt cmpnez32 ( UInt xx ) 2821806918ae2783af5808f00876581e01c7b650a0dsewardj{ 2831806918ae2783af5808f00876581e01c7b650a0dsewardj return xx==0 ? 0 : 0xFFFFFFFF; 2841806918ae2783af5808f00876581e01c7b650a0dsewardj} 2851806918ae2783af5808f00876581e01c7b650a0dsewardj 2861806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UShort cmpnez16 ( UShort xx ) 2871806918ae2783af5808f00876581e01c7b650a0dsewardj{ 288d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(xx==0 ? 0 : 0xFFFF); 2891806918ae2783af5808f00876581e01c7b650a0dsewardj} 2901806918ae2783af5808f00876581e01c7b650a0dsewardj 2911806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UChar cmpnez8 ( UChar xx ) 2921806918ae2783af5808f00876581e01c7b650a0dsewardj{ 293d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(xx==0 ? 0 : 0xFF); 2941806918ae2783af5808f00876581e01c7b650a0dsewardj} 2951806918ae2783af5808f00876581e01c7b650a0dsewardj 296c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Short qnarrow32Sto16S ( UInt xx0 ) 29738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 29838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int xx = (Int)xx0; 29938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx < -32768) xx = -32768; 30038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx > 32767) xx = 32767; 30138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)xx; 30238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 30338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 304c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Char qnarrow16Sto8S ( UShort xx0 ) 30538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 30638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Short xx = (Short)xx0; 30738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx < -128) xx = -128; 30838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx > 127) xx = 127; 30938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Char)xx; 31038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 31138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 312c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline UChar qnarrow16Sto8U ( UShort xx0 ) 31338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 31438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Short xx = (Short)xx0; 31538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx < 0) xx = 0; 31638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx > 255) xx = 255; 31738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)xx; 31838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 31938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 320ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjstatic inline UShort narrow32to16 ( UInt xx ) 321ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 322ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj return (UShort)xx; 323ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 324ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 325ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjstatic inline UChar narrow16to8 ( UShort xx ) 326ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 327ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj return (UChar)xx; 328ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 329ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 33038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* shifts: we don't care about out-of-range ones, since 33138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj that is dealt with at a higher level. */ 33238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 333d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar shl8 ( UChar v, UInt n ) 334d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 335d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return toUChar(v << n); 336d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 337d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 338d71ba837242cc470f622335b1c650bce8886a533sewardjstatic inline UChar sar8 ( UChar v, UInt n ) 339d71ba837242cc470f622335b1c650bce8886a533sewardj{ 340d71ba837242cc470f622335b1c650bce8886a533sewardj return toUChar(((Char)v) >> n); 341d71ba837242cc470f622335b1c650bce8886a533sewardj} 342d71ba837242cc470f622335b1c650bce8886a533sewardj 34338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shl16 ( UShort v, UInt n ) 34438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 345d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(v << n); 34638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 34738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 34838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shr16 ( UShort v, UInt n ) 34938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 350d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort((((UShort)v) >> n)); 35138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 35238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 35338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sar16 ( UShort v, UInt n ) 35438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 355d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(((Short)v) >> n); 35638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 35738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 35838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shl32 ( UInt v, UInt n ) 35938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 36038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return v << n; 36138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 36238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 36338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shr32 ( UInt v, UInt n ) 36438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 36538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (((UInt)v) >> n); 36638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 36738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 36838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sar32 ( UInt v, UInt n ) 36938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 37038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return ((Int)v) >> n; 37138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 37238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 37338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar avg8U ( UChar xx, UChar yy ) 37438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 37538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt xxi = (UInt)xx; 37638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt yyi = (UInt)yy; 37738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt r = (xxi + yyi + 1) >> 1; 37838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)r; 37938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 38038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 38138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort avg16U ( UShort xx, UShort yy ) 38238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 38338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt xxi = (UInt)xx; 38438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt yyi = (UInt)yy; 38538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt r = (xxi + yyi + 1) >> 1; 38638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)r; 38738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 38838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 38938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short max16S ( Short xx, Short yy ) 39038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 391d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort((xx > yy) ? xx : yy); 39238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 39338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 39438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar max8U ( UChar xx, UChar yy ) 39538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 396d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar((xx > yy) ? xx : yy); 39738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 39838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 39938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short min16S ( Short xx, Short yy ) 40038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 401d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort((xx < yy) ? xx : yy); 40238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 40338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 40438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar min8U ( UChar xx, UChar yy ) 40538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 406d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar((xx < yy) ? xx : yy); 40738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 40838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 409e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hadd16U ( UShort xx, UShort yy ) 410e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 411e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 412e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 413e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi + yyi) >> 1; 414e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UShort)r; 415e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 416e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 417e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hadd16S ( Short xx, Short yy ) 418e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 419e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 420e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 421e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi + yyi) >> 1; 422e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Short)r; 423e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 424e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 425e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hsub16U ( UShort xx, UShort yy ) 426e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 427e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 428e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 429e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi - yyi) >> 1; 430e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UShort)r; 431e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 432e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 433e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hsub16S ( Short xx, Short yy ) 434e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 435e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 436e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 437e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi - yyi) >> 1; 438e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Short)r; 439e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 440e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 441e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hadd8U ( UChar xx, UChar yy ) 442e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 443e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 444e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 445e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi + yyi) >> 1; 446e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UChar)r; 447e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 448e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 449e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hadd8S ( Char xx, Char yy ) 450e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 451e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 452e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 453e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi + yyi) >> 1; 454e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Char)r; 455e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 456e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 457e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hsub8U ( UChar xx, UChar yy ) 458e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 459e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 460e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 461e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi - yyi) >> 1; 462e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UChar)r; 463e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 464e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 465e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hsub8S ( Char xx, Char yy ) 466e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 467e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 468e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 469e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi - yyi) >> 1; 470e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Char)r; 471e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 472e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 473310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjstatic inline UInt absdiff8U ( UChar xx, UChar yy ) 474310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{ 475310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj UInt xxu = (UChar)xx; 476310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj UInt yyu = (UChar)yy; 477310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj return xxu >= yyu ? xxu - yyu : yyu - xxu; 478310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj} 479e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 48038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */ 48138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Start of the externally visible functions. These simply 48238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj implement the corresponding IR primops. */ 48338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */ 48438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 48538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal addition ------------ */ 48638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 48738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add32x2 ( ULong xx, ULong yy ) 48838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 48938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 49038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(xx) + sel32x2_1(yy), 49138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(xx) + sel32x2_0(yy) 49238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 49338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 49438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 49538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add16x4 ( ULong xx, ULong yy ) 49638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 49738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 498d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_3(xx) + sel16x4_3(yy) ), 499d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_2(xx) + sel16x4_2(yy) ), 500d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_1(xx) + sel16x4_1(yy) ), 501d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_0(xx) + sel16x4_0(yy) ) 50238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 50338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 50438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 50538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add8x8 ( ULong xx, ULong yy ) 50638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 50738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 508d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_7(xx) + sel8x8_7(yy) ), 509d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_6(xx) + sel8x8_6(yy) ), 510d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_5(xx) + sel8x8_5(yy) ), 511d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_4(xx) + sel8x8_4(yy) ), 512d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_3(xx) + sel8x8_3(yy) ), 513d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_2(xx) + sel8x8_2(yy) ), 514d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_1(xx) + sel8x8_1(yy) ), 515d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_0(xx) + sel8x8_0(yy) ) 51638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 51738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 51838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 51938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating addition ------------ */ 52038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 52138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy ) 52238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 52338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 52438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_3(xx), sel16x4_3(yy) ), 52538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_2(xx), sel16x4_2(yy) ), 52638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_1(xx), sel16x4_1(yy) ), 52738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_0(xx), sel16x4_0(yy) ) 52838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 52938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 53038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 53138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy ) 53238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 53338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 53438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_7(xx), sel8x8_7(yy) ), 53538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_6(xx), sel8x8_6(yy) ), 53638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_5(xx), sel8x8_5(yy) ), 53738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_4(xx), sel8x8_4(yy) ), 53838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_3(xx), sel8x8_3(yy) ), 53938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_2(xx), sel8x8_2(yy) ), 54038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_1(xx), sel8x8_1(yy) ), 54138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_0(xx), sel8x8_0(yy) ) 54238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 54338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 54438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 54538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy ) 54638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 54738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 54838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_3(xx), sel16x4_3(yy) ), 54938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_2(xx), sel16x4_2(yy) ), 55038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_1(xx), sel16x4_1(yy) ), 55138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_0(xx), sel16x4_0(yy) ) 55238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 55338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 55438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 55538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy ) 55638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 55738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 55838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_7(xx), sel8x8_7(yy) ), 55938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_6(xx), sel8x8_6(yy) ), 56038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_5(xx), sel8x8_5(yy) ), 56138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_4(xx), sel8x8_4(yy) ), 56238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_3(xx), sel8x8_3(yy) ), 56338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_2(xx), sel8x8_2(yy) ), 56438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_1(xx), sel8x8_1(yy) ), 56538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_0(xx), sel8x8_0(yy) ) 56638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 56738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 56838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 56938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal subtraction ------------ */ 57038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 57138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy ) 57238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 57338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 57438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(xx) - sel32x2_1(yy), 57538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(xx) - sel32x2_0(yy) 57638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 57738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 57838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 57938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy ) 58038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 58138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 582d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_3(xx) - sel16x4_3(yy) ), 583d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_2(xx) - sel16x4_2(yy) ), 584d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_1(xx) - sel16x4_1(yy) ), 585d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_0(xx) - sel16x4_0(yy) ) 58638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 58738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 58838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 58938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy ) 59038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 59138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 592d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_7(xx) - sel8x8_7(yy) ), 593d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_6(xx) - sel8x8_6(yy) ), 594d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_5(xx) - sel8x8_5(yy) ), 595d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_4(xx) - sel8x8_4(yy) ), 596d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_3(xx) - sel8x8_3(yy) ), 597d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_2(xx) - sel8x8_2(yy) ), 598d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_1(xx) - sel8x8_1(yy) ), 599d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_0(xx) - sel8x8_0(yy) ) 60038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 60138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 60238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 60338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating subtraction ------------ */ 60438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 60538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy ) 60638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 60738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 60838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_3(xx), sel16x4_3(yy) ), 60938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_2(xx), sel16x4_2(yy) ), 61038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_1(xx), sel16x4_1(yy) ), 61138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_0(xx), sel16x4_0(yy) ) 61238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 61338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 61438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 61538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy ) 61638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 61738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 61838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_7(xx), sel8x8_7(yy) ), 61938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_6(xx), sel8x8_6(yy) ), 62038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_5(xx), sel8x8_5(yy) ), 62138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_4(xx), sel8x8_4(yy) ), 62238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_3(xx), sel8x8_3(yy) ), 62338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_2(xx), sel8x8_2(yy) ), 62438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_1(xx), sel8x8_1(yy) ), 62538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_0(xx), sel8x8_0(yy) ) 62638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 62738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 62838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 62938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy ) 63038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 63138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 63238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_3(xx), sel16x4_3(yy) ), 63338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_2(xx), sel16x4_2(yy) ), 63438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_1(xx), sel16x4_1(yy) ), 63538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_0(xx), sel16x4_0(yy) ) 63638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 63738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 63838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 63938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy ) 64038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 64138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 64238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_7(xx), sel8x8_7(yy) ), 64338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_6(xx), sel8x8_6(yy) ), 64438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_5(xx), sel8x8_5(yy) ), 64538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_4(xx), sel8x8_4(yy) ), 64638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_3(xx), sel8x8_3(yy) ), 64738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_2(xx), sel8x8_2(yy) ), 64838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_1(xx), sel8x8_1(yy) ), 64938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_0(xx), sel8x8_0(yy) ) 65038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 65138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 65238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 65338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Multiplication ------------ */ 65438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 65538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy ) 65638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 65738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 65838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_3(xx), sel16x4_3(yy) ), 65938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_2(xx), sel16x4_2(yy) ), 66038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_1(xx), sel16x4_1(yy) ), 66138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_0(xx), sel16x4_0(yy) ) 66238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 66338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 66438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 665d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy ) 666d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 667d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk32x2( 668d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj mul32( sel32x2_1(xx), sel32x2_1(yy) ), 669d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj mul32( sel32x2_0(xx), sel32x2_0(yy) ) 670d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 671d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 672d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 67338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy ) 67438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 67538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 67638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ), 67738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ), 67838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ), 67938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_0(xx), sel16x4_0(yy) ) 68038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 68138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 68238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 68338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy ) 68438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 68538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 68638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ), 68738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ), 68838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ), 68938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_0(xx), sel16x4_0(yy) ) 69038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 69138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 69238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 69338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Comparison ------------ */ 69438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 69538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy ) 69638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 69738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 69838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ), 69938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq32( sel32x2_0(xx), sel32x2_0(yy) ) 70038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 70138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 70238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 70338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy ) 70438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 70538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 70638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ), 70738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ), 70838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ), 70938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_0(xx), sel16x4_0(yy) ) 71038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 71138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 71238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 71338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy ) 71438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 71538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 71638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ), 71738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ), 71838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ), 71938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ), 72038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ), 72138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ), 72238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ), 72338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_0(xx), sel8x8_0(yy) ) 72438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 72538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 72638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 72738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy ) 72838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 72938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 73038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ), 73138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) ) 73238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 73338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 73438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 73538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy ) 73638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 73738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 73838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ), 73938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ), 74038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ), 74138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) ) 74238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 74338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 74438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 74538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy ) 74638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 74738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 74838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ), 74938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ), 75038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ), 75138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ), 75238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ), 75338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ), 75438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ), 75538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) ) 75638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 75738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 75838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7591806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ32x2 ( ULong xx ) 7601806918ae2783af5808f00876581e01c7b650a0dsewardj{ 7611806918ae2783af5808f00876581e01c7b650a0dsewardj return mk32x2( 7621806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez32( sel32x2_1(xx) ), 7631806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez32( sel32x2_0(xx) ) 7641806918ae2783af5808f00876581e01c7b650a0dsewardj ); 7651806918ae2783af5808f00876581e01c7b650a0dsewardj} 7661806918ae2783af5808f00876581e01c7b650a0dsewardj 7671806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ16x4 ( ULong xx ) 7681806918ae2783af5808f00876581e01c7b650a0dsewardj{ 7691806918ae2783af5808f00876581e01c7b650a0dsewardj return mk16x4( 7701806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_3(xx) ), 7711806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_2(xx) ), 7721806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_1(xx) ), 7731806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_0(xx) ) 7741806918ae2783af5808f00876581e01c7b650a0dsewardj ); 7751806918ae2783af5808f00876581e01c7b650a0dsewardj} 7761806918ae2783af5808f00876581e01c7b650a0dsewardj 7771806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ8x8 ( ULong xx ) 7781806918ae2783af5808f00876581e01c7b650a0dsewardj{ 7791806918ae2783af5808f00876581e01c7b650a0dsewardj return mk8x8( 7801806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_7(xx) ), 7811806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_6(xx) ), 7821806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_5(xx) ), 7831806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_4(xx) ), 7841806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_3(xx) ), 7851806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_2(xx) ), 7861806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_1(xx) ), 7871806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_0(xx) ) 7881806918ae2783af5808f00876581e01c7b650a0dsewardj ); 7891806918ae2783af5808f00876581e01c7b650a0dsewardj} 7901806918ae2783af5808f00876581e01c7b650a0dsewardj 79138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating narrowing ------------ */ 79238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7935f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong aa, ULong bb ) 79438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 79538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt d = sel32x2_1(aa); 79638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt c = sel32x2_0(aa); 79738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt b = sel32x2_1(bb); 79838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt a = sel32x2_0(bb); 79938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 800c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(d), 801c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(c), 802c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(b), 803c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(a) 80438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 80538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 80638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 8075f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong aa, ULong bb ) 80838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 80938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort h = sel16x4_3(aa); 81038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort g = sel16x4_2(aa); 81138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort f = sel16x4_1(aa); 81238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort e = sel16x4_0(aa); 81338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort d = sel16x4_3(bb); 81438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort c = sel16x4_2(bb); 81538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort b = sel16x4_1(bb); 81638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort a = sel16x4_0(bb); 81738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 818c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(h), 819c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(g), 820c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(f), 821c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(e), 822c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(d), 823c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(c), 824c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(b), 825c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(a) 82638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 82738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 82838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 8295f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb ) 83038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 83138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort h = sel16x4_3(aa); 83238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort g = sel16x4_2(aa); 83338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort f = sel16x4_1(aa); 83438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort e = sel16x4_0(aa); 83538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort d = sel16x4_3(bb); 83638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort c = sel16x4_2(bb); 83738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort b = sel16x4_1(bb); 83838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort a = sel16x4_0(bb); 83938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 840c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(h), 841c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(g), 842c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(f), 843c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(e), 844c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(d), 845c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(c), 846c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(b), 847c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(a) 84838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 84938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 85038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 851ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj/* ------------ Truncating narrowing ------------ */ 852ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 853ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb ) 854ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 855ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UInt d = sel32x2_1(aa); 856ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UInt c = sel32x2_0(aa); 857ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UInt b = sel32x2_1(bb); 858ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UInt a = sel32x2_0(bb); 859ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj return mk16x4( 860ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow32to16(d), 861ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow32to16(c), 862ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow32to16(b), 863ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow32to16(a) 864ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj ); 865ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 866ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 867ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb ) 868ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 869ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort h = sel16x4_3(aa); 870ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort g = sel16x4_2(aa); 871ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort f = sel16x4_1(aa); 872ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort e = sel16x4_0(aa); 873ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort d = sel16x4_3(bb); 874ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort c = sel16x4_2(bb); 875ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort b = sel16x4_1(bb); 876ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj UShort a = sel16x4_0(bb); 877ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj return mk8x8( 878ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(h), 879ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(g), 880ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(f), 881ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(e), 882ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(d), 883ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(c), 884ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(b), 885ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj narrow16to8(a) 886ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj ); 887ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 888ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 88938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Interleaving ------------ */ 89038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 89138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb ) 89238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 89338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 89438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_7(aa), 89538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_7(bb), 89638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_6(aa), 89738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_6(bb), 89838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_5(aa), 89938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_5(bb), 90038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_4(aa), 90138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_4(bb) 90238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 90338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 90438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 90538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb ) 90638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 90738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 90838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_3(aa), 90938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_3(bb), 91038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_2(aa), 91138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_2(bb), 91238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_1(aa), 91338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_1(bb), 91438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_0(aa), 91538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_0(bb) 91638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 91738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 91838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 91938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb ) 92038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 92138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 92238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_3(aa), 92338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_3(bb), 92438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_2(aa), 92538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_2(bb) 92638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 92738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 92838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 92938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb ) 93038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 93138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 93238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_1(aa), 93338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_1(bb), 93438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_0(aa), 93538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_0(bb) 93638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 93738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 93838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 93938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb ) 94038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 94138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 94238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(aa), 94338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(bb) 94438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 94538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 94638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 94738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb ) 94838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 94938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 95038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(aa), 95138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(bb) 95238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 95338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 95438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 955d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* ------------ Concatenation ------------ */ 956d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 957d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb ) 958d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 959d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk16x4( 960d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_3(aa), 961d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_1(aa), 962d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_3(bb), 963d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_1(bb) 964d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 965d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 966d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 967d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb ) 968d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 969d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk16x4( 970d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_2(aa), 971d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_0(aa), 972d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_2(bb), 973d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_0(bb) 974d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 975d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 976d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 977d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* misc hack looking for a proper home */ 978d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb ) 979d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 980d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk8x8( 981d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_7(bb)), 982d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_6(bb)), 983d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_5(bb)), 984d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_4(bb)), 985d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_3(bb)), 986d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_2(bb)), 987d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_1(bb)), 988d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_0(bb)) 989d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 990d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 99138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 99238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Shifting ------------ */ 99338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Note that because these primops are undefined if the shift amount 99438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj equals or exceeds the lane width, the shift amount is masked so 99538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj that the scalar shifts are always in range. In fact, given the 99638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj semantics of these primops (ShlN16x4, etc) it is an error if in 99738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj fact we are ever given an out-of-range shift amount. 99838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/ 99938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn ) 100038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 100138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 32); */ 100238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 31; 100338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 100438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl32( sel32x2_1(xx), nn ), 100538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl32( sel32x2_0(xx), nn ) 100638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 100738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 100838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 100938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn ) 101038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 101138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 16); */ 101238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 15; 101338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 101438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_3(xx), nn ), 101538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_2(xx), nn ), 101638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_1(xx), nn ), 101738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_0(xx), nn ) 101838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 1019d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 1020d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 1021d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_ShlN8x8 ( ULong xx, UInt nn ) 1022d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 1023d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj /* vassert(nn < 8); */ 1024d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj nn &= 7; 1025d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk8x8( 1026d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_7(xx), nn ), 1027d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_6(xx), nn ), 1028d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_5(xx), nn ), 1029d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_4(xx), nn ), 1030d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_3(xx), nn ), 1031d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_2(xx), nn ), 1032d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_1(xx), nn ), 1033d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_0(xx), nn ) 1034d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 103538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 103638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 103738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn ) 103838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 103938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 32); */ 104038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 31; 104138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 104238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr32( sel32x2_1(xx), nn ), 104338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr32( sel32x2_0(xx), nn ) 104438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 104538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 104638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 104738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn ) 104838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 104938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 16); */ 105038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 15; 105138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 105238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_3(xx), nn ), 105338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_2(xx), nn ), 105438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_1(xx), nn ), 105538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_0(xx), nn ) 105638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 105738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 105838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 105938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn ) 106038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 106138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 32); */ 106238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 31; 106338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 106438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar32( sel32x2_1(xx), nn ), 106538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar32( sel32x2_0(xx), nn ) 106638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 106738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 106838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 106938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn ) 107038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 107138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 16); */ 107238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 15; 107338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 107438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_3(xx), nn ), 107538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_2(xx), nn ), 107638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_1(xx), nn ), 107738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_0(xx), nn ) 107838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 107938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 108038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1081d71ba837242cc470f622335b1c650bce8886a533sewardjULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn ) 1082d71ba837242cc470f622335b1c650bce8886a533sewardj{ 1083d71ba837242cc470f622335b1c650bce8886a533sewardj /* vassert(nn < 8); */ 1084d71ba837242cc470f622335b1c650bce8886a533sewardj nn &= 7; 1085d71ba837242cc470f622335b1c650bce8886a533sewardj return mk8x8( 1086d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_7(xx), nn ), 1087d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_6(xx), nn ), 1088d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_5(xx), nn ), 1089d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_4(xx), nn ), 1090d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_3(xx), nn ), 1091d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_2(xx), nn ), 1092d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_1(xx), nn ), 1093d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_0(xx), nn ) 1094d71ba837242cc470f622335b1c650bce8886a533sewardj ); 1095d71ba837242cc470f622335b1c650bce8886a533sewardj} 1096d71ba837242cc470f622335b1c650bce8886a533sewardj 109738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Averaging ------------ */ 109838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 109938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy ) 110038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 110138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 110238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_7(xx), sel8x8_7(yy) ), 110338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_6(xx), sel8x8_6(yy) ), 110438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_5(xx), sel8x8_5(yy) ), 110538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_4(xx), sel8x8_4(yy) ), 110638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_3(xx), sel8x8_3(yy) ), 110738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_2(xx), sel8x8_2(yy) ), 110838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_1(xx), sel8x8_1(yy) ), 110938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_0(xx), sel8x8_0(yy) ) 111038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 111138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 111238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 111338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy ) 111438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 111538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 111638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_3(xx), sel16x4_3(yy) ), 111738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_2(xx), sel16x4_2(yy) ), 111838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_1(xx), sel16x4_1(yy) ), 111938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_0(xx), sel16x4_0(yy) ) 112038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 112138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 112238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 112338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ max/min ------------ */ 112438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 112538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy ) 112638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 112738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 112838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_3(xx), sel16x4_3(yy) ), 112938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_2(xx), sel16x4_2(yy) ), 113038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_1(xx), sel16x4_1(yy) ), 113138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_0(xx), sel16x4_0(yy) ) 113238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 113338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 113438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 113538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy ) 113638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 113738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 113838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_7(xx), sel8x8_7(yy) ), 113938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_6(xx), sel8x8_6(yy) ), 114038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_5(xx), sel8x8_5(yy) ), 114138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_4(xx), sel8x8_4(yy) ), 114238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_3(xx), sel8x8_3(yy) ), 114338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_2(xx), sel8x8_2(yy) ), 114438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_1(xx), sel8x8_1(yy) ), 114538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_0(xx), sel8x8_0(yy) ) 114638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 114738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 114838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 114938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy ) 115038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 115138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 115238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_3(xx), sel16x4_3(yy) ), 115338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_2(xx), sel16x4_2(yy) ), 115438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_1(xx), sel16x4_1(yy) ), 115538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_0(xx), sel16x4_0(yy) ) 115638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 115738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 115838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 115938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy ) 116038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 116138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 116238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_7(xx), sel8x8_7(yy) ), 116338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_6(xx), sel8x8_6(yy) ), 116438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_5(xx), sel8x8_5(yy) ), 116538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_4(xx), sel8x8_4(yy) ), 116638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_3(xx), sel8x8_3(yy) ), 116738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_2(xx), sel8x8_2(yy) ), 116838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_1(xx), sel8x8_1(yy) ), 116938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_0(xx), sel8x8_0(yy) ) 117038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 117138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 117238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1173e13074c2c1321d069fb95806bdce64f9a3512341sewardjUInt h_generic_calc_GetMSBs8x8 ( ULong xx ) 1174e13074c2c1321d069fb95806bdce64f9a3512341sewardj{ 1175e13074c2c1321d069fb95806bdce64f9a3512341sewardj UInt r = 0; 1176e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (64-1))) r |= (1<<7); 1177e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (56-1))) r |= (1<<6); 1178e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (48-1))) r |= (1<<5); 1179e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (40-1))) r |= (1<<4); 1180e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (32-1))) r |= (1<<3); 1181e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (24-1))) r |= (1<<2); 1182e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << (16-1))) r |= (1<<1); 1183e13074c2c1321d069fb95806bdce64f9a3512341sewardj if (xx & (1ULL << ( 8-1))) r |= (1<<0); 1184e13074c2c1321d069fb95806bdce64f9a3512341sewardj return r; 1185e13074c2c1321d069fb95806bdce64f9a3512341sewardj} 1186e13074c2c1321d069fb95806bdce64f9a3512341sewardj 1187e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */ 1188e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1189e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* Tuple/select functions for 16x2 vectors. */ 1190e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk16x2 ( UShort w1, UShort w2 ) { 1191e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (((UInt)w1) << 16) | ((UInt)w2); 1192e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1193e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1194e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_1 ( UInt w32 ) { 1195e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return 0xFFFF & (UShort)(w32 >> 16); 1196e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1197e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_0 ( UInt w32 ) { 1198e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return 0xFFFF & (UShort)(w32); 1199e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1200e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1201e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk8x4 ( UChar w3, UChar w2, 1202e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UChar w1, UChar w0 ) { 1203e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt w32 = (((UInt)w3) << 24) | (((UInt)w2) << 16) 1204e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj | (((UInt)w1) << 8) | (((UInt)w0) << 0); 1205e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return w32; 1206e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1207e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1208e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_3 ( UInt w32 ) { 1209e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 24)); 1210e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1211e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_2 ( UInt w32 ) { 1212e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 16)); 1213e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1214e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_1 ( UInt w32 ) { 1215e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 8)); 1216e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1217e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_0 ( UInt w32 ) { 1218e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 0)); 1219e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1220e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1221e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1222e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */ 1223e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* More externally visible functions. These simply 1224e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj implement the corresponding IR primops. */ 1225e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */ 1226e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1227e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 16x2 ------ */ 1228e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1229e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add16x2 ( UInt xx, UInt yy ) 1230e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1231e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( sel16x2_1(xx) + sel16x2_1(yy), 1232e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel16x2_0(xx) + sel16x2_0(yy) ); 1233e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1234e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1235e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy ) 1236e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1237e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( sel16x2_1(xx) - sel16x2_1(yy), 1238e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel16x2_0(xx) - sel16x2_0(yy) ); 1239e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1240e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1241e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy ) 1242e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1243e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ), 1244e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1245e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1246e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1247e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy ) 1248e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1249e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ), 1250e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1251e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1252e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1253e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy ) 1254e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1255e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ), 1256e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1257e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1258e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1259e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy ) 1260e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1261e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ), 1262e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1263e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1264e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1265e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy ) 1266e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1267e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ), 1268e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1269e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1270e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1271e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy ) 1272e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1273e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ), 1274e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1275e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1276e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1277e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy ) 1278e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1279e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ), 1280e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1281e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1282e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1283e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy ) 1284e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1285e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ), 1286e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1287e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1288e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1289e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 8x4 ------ */ 1290e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1291e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add8x4 ( UInt xx, UInt yy ) 1292e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1293e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1294e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_3(xx) + sel8x4_3(yy), 1295e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_2(xx) + sel8x4_2(yy), 1296e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_1(xx) + sel8x4_1(yy), 1297e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_0(xx) + sel8x4_0(yy) 1298e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1299e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1300e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1301e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy ) 1302e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1303e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1304e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_3(xx) - sel8x4_3(yy), 1305e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_2(xx) - sel8x4_2(yy), 1306e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_1(xx) - sel8x4_1(yy), 1307e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_0(xx) - sel8x4_0(yy) 1308e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1309e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1310e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1311e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy ) 1312e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1313e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1314e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_3(xx), sel8x4_3(yy) ), 1315e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_2(xx), sel8x4_2(yy) ), 1316e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_1(xx), sel8x4_1(yy) ), 1317e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_0(xx), sel8x4_0(yy) ) 1318e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1319e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1320e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1321e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy ) 1322e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1323e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1324e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_3(xx), sel8x4_3(yy) ), 1325e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_2(xx), sel8x4_2(yy) ), 1326e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_1(xx), sel8x4_1(yy) ), 1327e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_0(xx), sel8x4_0(yy) ) 1328e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1329e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1330e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1331e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy ) 1332e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1333e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1334e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_3(xx), sel8x4_3(yy) ), 1335e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_2(xx), sel8x4_2(yy) ), 1336e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_1(xx), sel8x4_1(yy) ), 1337e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_0(xx), sel8x4_0(yy) ) 1338e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1339e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1340e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1341e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy ) 1342e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1343e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1344e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_3(xx), sel8x4_3(yy) ), 1345e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_2(xx), sel8x4_2(yy) ), 1346e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_1(xx), sel8x4_1(yy) ), 1347e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_0(xx), sel8x4_0(yy) ) 1348e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1349e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1350e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1351e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy ) 1352e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1353e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1354e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_3(xx), sel8x4_3(yy) ), 1355e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_2(xx), sel8x4_2(yy) ), 1356e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_1(xx), sel8x4_1(yy) ), 1357e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_0(xx), sel8x4_0(yy) ) 1358e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1359e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1360e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1361e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy ) 1362e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1363e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1364e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_3(xx), sel8x4_3(yy) ), 1365e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_2(xx), sel8x4_2(yy) ), 1366e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_1(xx), sel8x4_1(yy) ), 1367e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_0(xx), sel8x4_0(yy) ) 1368e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1369e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1370e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1371e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy ) 1372e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1373e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1374e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_3(xx), sel8x4_3(yy) ), 1375e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_2(xx), sel8x4_2(yy) ), 1376e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_1(xx), sel8x4_1(yy) ), 1377e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_0(xx), sel8x4_0(yy) ) 1378e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1379e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1380e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1381e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy ) 1382e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1383e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1384e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_3(xx), sel8x4_3(yy) ), 1385e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_2(xx), sel8x4_2(yy) ), 1386e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_1(xx), sel8x4_1(yy) ), 1387e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_0(xx), sel8x4_0(yy) ) 1388e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1389e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1390e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1391e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ16x2 ( UInt xx ) 1392e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1393e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( 1394e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez16( sel16x2_1(xx) ), 1395e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez16( sel16x2_0(xx) ) 1396e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1397e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1398e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1399e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ8x4 ( UInt xx ) 1400e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1401e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1402e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_3(xx) ), 1403e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_2(xx) ), 1404e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_1(xx) ), 1405e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_0(xx) ) 1406e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1407e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 140838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1409310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjUInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy ) 1410310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{ 1411310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) ) 1412310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj + absdiff8U( sel8x4_2(xx), sel8x4_2(yy) ) 1413310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj + absdiff8U( sel8x4_1(xx), sel8x4_1(yy) ) 1414310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) ); 1415310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj} 1416310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj 141744ce46d5945ed83d96695d280510cc2a858894dcsewardjUInt h_generic_calc_QAdd32S ( UInt xx, UInt yy ) 141844ce46d5945ed83d96695d280510cc2a858894dcsewardj{ 141944ce46d5945ed83d96695d280510cc2a858894dcsewardj return qadd32S( xx, yy ); 142044ce46d5945ed83d96695d280510cc2a858894dcsewardj} 142144ce46d5945ed83d96695d280510cc2a858894dcsewardj 142244ce46d5945ed83d96695d280510cc2a858894dcsewardjUInt h_generic_calc_QSub32S ( UInt xx, UInt yy ) 142344ce46d5945ed83d96695d280510cc2a858894dcsewardj{ 142444ce46d5945ed83d96695d280510cc2a858894dcsewardj return qsub32S( xx, yy ); 142544ce46d5945ed83d96695d280510cc2a858894dcsewardj} 142644ce46d5945ed83d96695d280510cc2a858894dcsewardj 142744ce46d5945ed83d96695d280510cc2a858894dcsewardj 14284c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/*------------------------------------------------------------------*/ 14294c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/* Decimal Floating Point (DFP) externally visible helper functions */ 14304c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/* that implement Iop_BCDtoDPB and Iop_DPBtoBCD */ 14314c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/*------------------------------------------------------------------*/ 14324c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 14334c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#define NOT( x ) ( ( ( x ) == 0) ? 1 : 0) 14344c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#define GET( x, y ) ( ( ( x ) & ( 0x1UL << ( y ) ) ) >> ( y ) ) 14354c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#define PUT( x, y ) ( ( x )<< ( y ) ) 14364c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 14378bde7f1c67483371551aac0d4019c24c919063f7sewardjstatic ULong dpb_to_bcd( ULong chunk ) 14384c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{ 14394c96e61dd85c172b999d6afc88ce6640aeba9962sewardj Short a, b, c, d, e, f, g, h, i, j, k, m; 14404c96e61dd85c172b999d6afc88ce6640aeba9962sewardj Short p, q, r, s, t, u, v, w, x, y; 14414c96e61dd85c172b999d6afc88ce6640aeba9962sewardj ULong value; 14424c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 14434c96e61dd85c172b999d6afc88ce6640aeba9962sewardj /* convert 10 bit densely packed BCD to BCD */ 14444c96e61dd85c172b999d6afc88ce6640aeba9962sewardj p = GET( chunk, 9 ); 14454c96e61dd85c172b999d6afc88ce6640aeba9962sewardj q = GET( chunk, 8 ); 14464c96e61dd85c172b999d6afc88ce6640aeba9962sewardj r = GET( chunk, 7 ); 14474c96e61dd85c172b999d6afc88ce6640aeba9962sewardj s = GET( chunk, 6 ); 14484c96e61dd85c172b999d6afc88ce6640aeba9962sewardj t = GET( chunk, 5 ); 14494c96e61dd85c172b999d6afc88ce6640aeba9962sewardj u = GET( chunk, 4 ); 14504c96e61dd85c172b999d6afc88ce6640aeba9962sewardj v = GET( chunk, 3 ); 14514c96e61dd85c172b999d6afc88ce6640aeba9962sewardj w = GET( chunk, 2 ); 14524c96e61dd85c172b999d6afc88ce6640aeba9962sewardj x = GET( chunk, 1 ); 14534c96e61dd85c172b999d6afc88ce6640aeba9962sewardj y = GET( chunk, 0 ); 14544c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 14554c96e61dd85c172b999d6afc88ce6640aeba9962sewardj /* The BCD bit values are given by the following boolean equations.*/ 14564c96e61dd85c172b999d6afc88ce6640aeba9962sewardj a = ( NOT(s) & v & w ) | ( t & v & w & s ) | ( v & w & NOT(x) ); 14574c96e61dd85c172b999d6afc88ce6640aeba9962sewardj b = ( p & s & x & NOT(t) ) | ( p & NOT(w) ) | ( p & NOT(v) ); 14584c96e61dd85c172b999d6afc88ce6640aeba9962sewardj c = ( q & s & x & NOT(t) ) | ( q & NOT(w) ) | ( q & NOT(v) ); 14594c96e61dd85c172b999d6afc88ce6640aeba9962sewardj d = r; 14604c96e61dd85c172b999d6afc88ce6640aeba9962sewardj e = ( v & NOT(w) & x ) | ( s & v & w & x ) | ( NOT(t) & v & x & w ); 14614c96e61dd85c172b999d6afc88ce6640aeba9962sewardj f = ( p & t & v & w & x & NOT(s) ) | ( s & NOT(x) & v ) | ( s & NOT(v) ); 14624c96e61dd85c172b999d6afc88ce6640aeba9962sewardj g = ( q & t & w & v & x & NOT(s) ) | ( t & NOT(x) & v ) | ( t & NOT(v) ); 14634c96e61dd85c172b999d6afc88ce6640aeba9962sewardj h = u; 14644c96e61dd85c172b999d6afc88ce6640aeba9962sewardj i = ( t & v & w & x ) | ( s & v & w & x ) | ( v & NOT(w) & NOT(x) ); 14654c96e61dd85c172b999d6afc88ce6640aeba9962sewardj j = ( p & NOT(s) & NOT(t) & w & v ) | ( s & v & NOT(w) & x ) 14664c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | ( p & w & NOT(x) & v ) | ( w & NOT(v) ); 14674c96e61dd85c172b999d6afc88ce6640aeba9962sewardj k = ( q & NOT(s) & NOT(t) & v & w ) | ( t & v & NOT(w) & x ) 14684c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | ( q & v & w & NOT(x) ) | ( x & NOT(v) ); 14694c96e61dd85c172b999d6afc88ce6640aeba9962sewardj m = y; 14704c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 14714c96e61dd85c172b999d6afc88ce6640aeba9962sewardj value = PUT(a, 11) | PUT(b, 10) | PUT(c, 9) | PUT(d, 8) | PUT(e, 7) 14724c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | PUT(f, 6) | PUT(g, 5) | PUT(h, 4) | PUT(i, 3) | PUT(j, 2) 14734c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | PUT(k, 1) | PUT(m, 0); 14744c96e61dd85c172b999d6afc88ce6640aeba9962sewardj return value; 14754c96e61dd85c172b999d6afc88ce6640aeba9962sewardj} 14764c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 14778bde7f1c67483371551aac0d4019c24c919063f7sewardjstatic ULong bcd_to_dpb( ULong chunk ) 14784c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{ 14794c96e61dd85c172b999d6afc88ce6640aeba9962sewardj Short a, b, c, d, e, f, g, h, i, j, k, m; 14804c96e61dd85c172b999d6afc88ce6640aeba9962sewardj Short p, q, r, s, t, u, v, w, x, y; 14814c96e61dd85c172b999d6afc88ce6640aeba9962sewardj ULong value; 14824c96e61dd85c172b999d6afc88ce6640aeba9962sewardj /* Convert a 3 digit BCD value to a 10 bit Densely Packed Binary (DPD) value 14834c96e61dd85c172b999d6afc88ce6640aeba9962sewardj The boolean equations to calculate the value of each of the DPD bit 14844c96e61dd85c172b999d6afc88ce6640aeba9962sewardj is given in Appendix B of Book 1: Power ISA User Instruction set. The 14854c96e61dd85c172b999d6afc88ce6640aeba9962sewardj bits for the DPD number are [abcdefghijkm]. The bits for the BCD value 14864c96e61dd85c172b999d6afc88ce6640aeba9962sewardj are [pqrstuvwxy]. The boolean logic equations in psuedo C code are: 14874c96e61dd85c172b999d6afc88ce6640aeba9962sewardj */ 14884c96e61dd85c172b999d6afc88ce6640aeba9962sewardj a = GET( chunk, 11 ); 14894c96e61dd85c172b999d6afc88ce6640aeba9962sewardj b = GET( chunk, 10 ); 14904c96e61dd85c172b999d6afc88ce6640aeba9962sewardj c = GET( chunk, 9 ); 14914c96e61dd85c172b999d6afc88ce6640aeba9962sewardj d = GET( chunk, 8 ); 14924c96e61dd85c172b999d6afc88ce6640aeba9962sewardj e = GET( chunk, 7 ); 14934c96e61dd85c172b999d6afc88ce6640aeba9962sewardj f = GET( chunk, 6 ); 14944c96e61dd85c172b999d6afc88ce6640aeba9962sewardj g = GET( chunk, 5 ); 14954c96e61dd85c172b999d6afc88ce6640aeba9962sewardj h = GET( chunk, 4 ); 14964c96e61dd85c172b999d6afc88ce6640aeba9962sewardj i = GET( chunk, 3 ); 14974c96e61dd85c172b999d6afc88ce6640aeba9962sewardj j = GET( chunk, 2 ); 14984c96e61dd85c172b999d6afc88ce6640aeba9962sewardj k = GET( chunk, 1 ); 14994c96e61dd85c172b999d6afc88ce6640aeba9962sewardj m = GET( chunk, 0 ); 15004c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15014c96e61dd85c172b999d6afc88ce6640aeba9962sewardj p = ( f & a & i & NOT(e) ) | ( j & a & NOT(i) ) | ( b & NOT(a) ); 15024c96e61dd85c172b999d6afc88ce6640aeba9962sewardj q = ( g & a & i & NOT(e) ) | ( k & a & NOT(i) ) | ( c & NOT(a) ); 15034c96e61dd85c172b999d6afc88ce6640aeba9962sewardj r = d; 15044c96e61dd85c172b999d6afc88ce6640aeba9962sewardj s = ( j & NOT(a) & e & NOT(i) ) | ( f & NOT(i) & NOT(e) ) 15054c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | ( f & NOT(a) & NOT(e) ) | ( e & i ); 15064c96e61dd85c172b999d6afc88ce6640aeba9962sewardj t = ( k & NOT(a) & e & NOT(i) ) | ( g & NOT(i) & NOT(e) ) 15074c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | ( g & NOT(a) & NOT(e) ) | ( a & i ); 15084c96e61dd85c172b999d6afc88ce6640aeba9962sewardj u = h; 15094c96e61dd85c172b999d6afc88ce6640aeba9962sewardj v = a | e | i; 15104c96e61dd85c172b999d6afc88ce6640aeba9962sewardj w = ( NOT(e) & j & NOT(i) ) | ( e & i ) | a; 15114c96e61dd85c172b999d6afc88ce6640aeba9962sewardj x = ( NOT(a) & k & NOT(i) ) | ( a & i ) | e; 15124c96e61dd85c172b999d6afc88ce6640aeba9962sewardj y = m; 15134c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15144c96e61dd85c172b999d6afc88ce6640aeba9962sewardj value = PUT(p, 9) | PUT(q, 8) | PUT(r, 7) | PUT(s, 6) | PUT(t, 5) 15154c96e61dd85c172b999d6afc88ce6640aeba9962sewardj | PUT(u, 4) | PUT(v, 3) | PUT(w, 2) | PUT(x, 1) | y; 15164c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15174c96e61dd85c172b999d6afc88ce6640aeba9962sewardj return value; 15184c96e61dd85c172b999d6afc88ce6640aeba9962sewardj} 15194c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15208bde7f1c67483371551aac0d4019c24c919063f7sewardjULong h_calc_DPBtoBCD( ULong dpb ) 15214c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{ 15224c96e61dd85c172b999d6afc88ce6640aeba9962sewardj ULong result, chunk; 15234c96e61dd85c172b999d6afc88ce6640aeba9962sewardj Int i; 15244c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15254c96e61dd85c172b999d6afc88ce6640aeba9962sewardj result = 0; 15264c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15274c96e61dd85c172b999d6afc88ce6640aeba9962sewardj for (i = 0; i < 5; i++) { 15284c96e61dd85c172b999d6afc88ce6640aeba9962sewardj chunk = dpb >> ( 4 - i ) * 10; 15294c96e61dd85c172b999d6afc88ce6640aeba9962sewardj result = result << 12; 15304c96e61dd85c172b999d6afc88ce6640aeba9962sewardj result |= dpb_to_bcd( chunk & 0x3FF ); 15314c96e61dd85c172b999d6afc88ce6640aeba9962sewardj } 15324c96e61dd85c172b999d6afc88ce6640aeba9962sewardj return result; 15334c96e61dd85c172b999d6afc88ce6640aeba9962sewardj} 15344c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15358bde7f1c67483371551aac0d4019c24c919063f7sewardjULong h_calc_BCDtoDPB( ULong bcd ) 15364c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{ 15374c96e61dd85c172b999d6afc88ce6640aeba9962sewardj ULong result, chunk; 15384c96e61dd85c172b999d6afc88ce6640aeba9962sewardj Int i; 15394c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15404c96e61dd85c172b999d6afc88ce6640aeba9962sewardj result = 0; 15414c96e61dd85c172b999d6afc88ce6640aeba9962sewardj 15424c96e61dd85c172b999d6afc88ce6640aeba9962sewardj for (i = 0; i < 5; i++) { 15434c96e61dd85c172b999d6afc88ce6640aeba9962sewardj chunk = bcd >> ( 4 - i ) * 12; 15444c96e61dd85c172b999d6afc88ce6640aeba9962sewardj result = result << 10; 15454c96e61dd85c172b999d6afc88ce6640aeba9962sewardj result |= bcd_to_dpb( chunk & 0xFFF ); 15464c96e61dd85c172b999d6afc88ce6640aeba9962sewardj } 15474c96e61dd85c172b999d6afc88ce6640aeba9962sewardj return result; 15484c96e61dd85c172b999d6afc88ce6640aeba9962sewardj} 15494c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#undef NOT 15504c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#undef GET 15514c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#undef PUT 1552310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj 15538bde7f1c67483371551aac0d4019c24c919063f7sewardj 15548bde7f1c67483371551aac0d4019c24c919063f7sewardj/* ----------------------------------------------------- */ 15558bde7f1c67483371551aac0d4019c24c919063f7sewardj/* Signed and unsigned integer division, that behave like 1556bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj the ARMv7 UDIV ansd SDIV instructions. 1557bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj 1558bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj sdiv32 also behaves like 64-bit v8 SDIV on w-regs. 1559bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj udiv32 also behaves like 64-bit v8 UDIV on w-regs. 1560bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj*/ 15618bde7f1c67483371551aac0d4019c24c919063f7sewardj/* ----------------------------------------------------- */ 15628bde7f1c67483371551aac0d4019c24c919063f7sewardj 15638bde7f1c67483371551aac0d4019c24c919063f7sewardjUInt h_calc_udiv32_w_arm_semantics ( UInt x, UInt y ) 15648bde7f1c67483371551aac0d4019c24c919063f7sewardj{ 15658bde7f1c67483371551aac0d4019c24c919063f7sewardj // Division by zero --> zero 15668bde7f1c67483371551aac0d4019c24c919063f7sewardj if (UNLIKELY(y == 0)) return 0; 15678bde7f1c67483371551aac0d4019c24c919063f7sewardj // C requires rounding towards zero, which is also what we need. 15688bde7f1c67483371551aac0d4019c24c919063f7sewardj return x / y; 15698bde7f1c67483371551aac0d4019c24c919063f7sewardj} 15708bde7f1c67483371551aac0d4019c24c919063f7sewardj 1571bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardjULong h_calc_udiv64_w_arm_semantics ( ULong x, ULong y ) 1572bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj{ 1573bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // Division by zero --> zero 1574bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj if (UNLIKELY(y == 0)) return 0; 1575bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // C requires rounding towards zero, which is also what we need. 1576bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj return x / y; 1577bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj} 1578bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj 15798bde7f1c67483371551aac0d4019c24c919063f7sewardjInt h_calc_sdiv32_w_arm_semantics ( Int x, Int y ) 15808bde7f1c67483371551aac0d4019c24c919063f7sewardj{ 15818bde7f1c67483371551aac0d4019c24c919063f7sewardj // Division by zero --> zero 15828bde7f1c67483371551aac0d4019c24c919063f7sewardj if (UNLIKELY(y == 0)) return 0; 1583bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // The single case that produces an unrepresentable result 15848bde7f1c67483371551aac0d4019c24c919063f7sewardj if (UNLIKELY( ((UInt)x) == ((UInt)0x80000000) 15858bde7f1c67483371551aac0d4019c24c919063f7sewardj && ((UInt)y) == ((UInt)0xFFFFFFFF) )) 15868bde7f1c67483371551aac0d4019c24c919063f7sewardj return (Int)(UInt)0x80000000; 15878bde7f1c67483371551aac0d4019c24c919063f7sewardj // Else return the result rounded towards zero. C89 says 15888bde7f1c67483371551aac0d4019c24c919063f7sewardj // this is implementation defined (in the signed case), but gcc 15898bde7f1c67483371551aac0d4019c24c919063f7sewardj // promises to round towards zero. Nevertheless, at startup, 15908bde7f1c67483371551aac0d4019c24c919063f7sewardj // in main_main.c, do a check for that. 15918bde7f1c67483371551aac0d4019c24c919063f7sewardj return x / y; 15928bde7f1c67483371551aac0d4019c24c919063f7sewardj} 15938bde7f1c67483371551aac0d4019c24c919063f7sewardj 1594bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardjLong h_calc_sdiv64_w_arm_semantics ( Long x, Long y ) 1595bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj{ 1596bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // Division by zero --> zero 1597bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj if (UNLIKELY(y == 0)) return 0; 1598bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // The single case that produces an unrepresentable result 1599bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj if (UNLIKELY( ((ULong)x) == ((ULong)0x8000000000000000ULL ) 1600bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj && ((ULong)y) == ((ULong)0xFFFFFFFFFFFFFFFFULL ) )) 1601bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj return (Long)(ULong)0x8000000000000000ULL; 1602bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // Else return the result rounded towards zero. C89 says 1603bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // this is implementation defined (in the signed case), but gcc 1604bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // promises to round towards zero. Nevertheless, at startup, 1605bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj // in main_main.c, do a check for that. 1606bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj return x / y; 1607bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj} 1608bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj 16098bde7f1c67483371551aac0d4019c24c919063f7sewardj 161038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 1611cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj/*--- end host_generic_simd64.c ---*/ 161238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 1613