host_generic_simd64.c revision 5f438dd73072211989c6d496845bdc9b777ecbec
138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 3752f90673ebbb6b2f55fc5e46606dea371313713sewardj/*--- begin host_generic_simd64.c ---*/ 438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* 7752f90673ebbb6b2f55fc5e46606dea371313713sewardj This file is part of Valgrind, a dynamic binary instrumentation 8752f90673ebbb6b2f55fc5e46606dea371313713sewardj framework. 938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 10752f90673ebbb6b2f55fc5e46606dea371313713sewardj Copyright (C) 2004-2010 OpenWorks LLP 11752f90673ebbb6b2f55fc5e46606dea371313713sewardj info@open-works.net 127bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 13752f90673ebbb6b2f55fc5e46606dea371313713sewardj This program is free software; you can redistribute it and/or 14752f90673ebbb6b2f55fc5e46606dea371313713sewardj modify it under the terms of the GNU General Public License as 15752f90673ebbb6b2f55fc5e46606dea371313713sewardj published by the Free Software Foundation; either version 2 of the 16752f90673ebbb6b2f55fc5e46606dea371313713sewardj License, or (at your option) any later version. 177bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 18752f90673ebbb6b2f55fc5e46606dea371313713sewardj This program is distributed in the hope that it will be useful, but 19752f90673ebbb6b2f55fc5e46606dea371313713sewardj WITHOUT ANY WARRANTY; without even the implied warranty of 20752f90673ebbb6b2f55fc5e46606dea371313713sewardj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21752f90673ebbb6b2f55fc5e46606dea371313713sewardj General Public License for more details. 22752f90673ebbb6b2f55fc5e46606dea371313713sewardj 23752f90673ebbb6b2f55fc5e46606dea371313713sewardj You should have received a copy of the GNU General Public License 24752f90673ebbb6b2f55fc5e46606dea371313713sewardj along with this program; if not, write to the Free Software 25752f90673ebbb6b2f55fc5e46606dea371313713sewardj Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 267bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 02110-1301, USA. 277bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj 28752f90673ebbb6b2f55fc5e46606dea371313713sewardj The GNU General Public License is contained in the file COPYING. 2938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 3038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Neither the names of the U.S. Department of Energy nor the 3138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj University of California nor the names of its contributors may be 3238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj used to endorse or promote products derived from this software 3338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj without prior written permission. 3438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/ 3538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 3638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Generic helper functions for doing 64-bit SIMD arithmetic in cases 3738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj where the instruction selectors cannot generate code in-line. 3838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj These are purely back-end entities and cannot be seen/referenced 3938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj from IR. */ 4038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj#include "libvex_basictypes.h" 42cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "host_generic_simd64.h" 4338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 32x2 vectors. */ 4738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 4838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk32x2 ( UInt w1, UInt w0 ) { 4938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (((ULong)w1) << 32) | ((ULong)w0); 5038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 5138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 5238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_1 ( ULong w64 ) { 53d19fc161147086f31126fef0955b426b4f843d02sewardj return 0xFFFFFFFF & toUInt(w64 >> 32); 5438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 5538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_0 ( ULong w64 ) { 56d19fc161147086f31126fef0955b426b4f843d02sewardj return 0xFFFFFFFF & toUInt(w64); 5738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 5838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 5938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 6038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless 6138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj with 64-bit shifts so we give it a hand. */ 6238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 6338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk16x4 ( UShort w3, UShort w2, 6438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort w1, UShort w0 ) { 6538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2); 6638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0); 6738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2(hi32, lo32); 6838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 6938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_3 ( ULong w64 ) { 71d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 72d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & (hi32 >> 16)); 7338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 7438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_2 ( ULong w64 ) { 75d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 76d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & hi32); 7738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 7838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_1 ( ULong w64 ) { 7938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 80d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & (lo32 >> 16)); 8138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 8238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_0 ( ULong w64 ) { 8338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 84d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(0xFFFF & lo32); 8538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 8638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 8738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 8838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 8x8 vectors. */ 8938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 9038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk8x8 ( UChar w7, UChar w6, 9138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UChar w5, UChar w4, 9238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UChar w3, UChar w2, 93e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UChar w1, UChar w0 ) { 9438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt hi32 = (((UInt)w7) << 24) | (((UInt)w6) << 16) 9538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj | (((UInt)w5) << 8) | (((UInt)w4) << 0); 9638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (((UInt)w3) << 24) | (((UInt)w2) << 16) 9738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj | (((UInt)w1) << 8) | (((UInt)w0) << 0); 9838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2(hi32, lo32); 9938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 10038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 10138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_7 ( ULong w64 ) { 102d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 103d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 24)); 10438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 10538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_6 ( ULong w64 ) { 106d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 107d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 16)); 10838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 10938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_5 ( ULong w64 ) { 110d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 111d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 8)); 11238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 11338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_4 ( ULong w64 ) { 114d19fc161147086f31126fef0955b426b4f843d02sewardj UInt hi32 = toUInt(w64 >> 32); 115d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (hi32 >> 0)); 11638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 11738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_3 ( ULong w64 ) { 11838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 119d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 24)); 12038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 12138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_2 ( ULong w64 ) { 12238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 123d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 16)); 12438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 12538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_1 ( ULong w64 ) { 12638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 127d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 8)); 12838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 12938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_0 ( ULong w64 ) { 13038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt lo32 = (UInt)w64; 131d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(0xFF & (lo32 >> 0)); 13238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 13338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 134d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar index8x8 ( ULong w64, UChar ix ) { 135d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ix &= 7; 136d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return toUChar((w64 >> (8*ix)) & 0xFF); 137d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 138d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 13938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 14038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Scalar helpers. */ 14138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 14238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qadd16S ( Short xx, Short yy ) 14338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 14438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) + ((Int)yy); 14538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -32768) t = -32768; 14638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 32767) t = 32767; 14738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 14838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 14938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 15038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qadd8S ( Char xx, Char yy ) 15138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 15238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) + ((Int)yy); 15338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -128) t = -128; 15438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 127) t = 127; 15538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Char)t; 15638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 15738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 15838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qadd16U ( UShort xx, UShort yy ) 15938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 16038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt t = ((UInt)xx) + ((UInt)yy); 16138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFFFF) t = 0xFFFF; 16238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)t; 16338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 16438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 16538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qadd8U ( UChar xx, UChar yy ) 16638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 16738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt t = ((UInt)xx) + ((UInt)yy); 16838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFF) t = 0xFF; 16938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)t; 17038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 17138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 17238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qsub16S ( Short xx, Short yy ) 17338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 17438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 17538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -32768) t = -32768; 17638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 32767) t = 32767; 17738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 17838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 17938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 18038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qsub8S ( Char xx, Char yy ) 18138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 18238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 18338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < -128) t = -128; 18438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 127) t = 127; 18538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Char)t; 18638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 18738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 18838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qsub16U ( UShort xx, UShort yy ) 18938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 19038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 19138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < 0) t = 0; 19238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFFFF) t = 0xFFFF; 19338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)t; 19438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 19538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 19638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qsub8U ( UChar xx, UChar yy ) 19738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 19838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) - ((Int)yy); 19938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t < 0) t = 0; 20038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (t > 0xFF) t = 0xFF; 20138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)t; 20238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 20338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 20438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mul16 ( Short xx, Short yy ) 20538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 20638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) * ((Int)yy); 20738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 20838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 20938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 210d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline Int mul32 ( Int xx, Int yy ) 211d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 212d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj Int t = ((Int)xx) * ((Int)yy); 213d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return (Int)t; 214d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 215d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 21638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mulhi16S ( Short xx, Short yy ) 21738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 21838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int t = ((Int)xx) * ((Int)yy); 21938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj t >>=/*s*/ 16; 22038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)t; 22138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 22238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 22338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort mulhi16U ( UShort xx, UShort yy ) 22438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 22538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt t = ((UInt)xx) * ((UInt)yy); 22638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj t >>=/*u*/ 16; 22738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)t; 22838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 22938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 23038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpeq32 ( UInt xx, UInt yy ) 23138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 23238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return xx==yy ? 0xFFFFFFFF : 0; 23338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 23438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 23538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpeq16 ( UShort xx, UShort yy ) 23638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 237d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(xx==yy ? 0xFFFF : 0); 23838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 23938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 24038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpeq8 ( UChar xx, UChar yy ) 24138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 242d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(xx==yy ? 0xFF : 0); 24338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 24438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 24538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpgt32S ( Int xx, Int yy ) 24638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 24738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return xx>yy ? 0xFFFFFFFF : 0; 24838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 24938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 25038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpgt16S ( Short xx, Short yy ) 25138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 252d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(xx>yy ? 0xFFFF : 0); 25338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 25438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 25538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpgt8S ( Char xx, Char yy ) 25638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 257d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(xx>yy ? 0xFF : 0); 25838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 25938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 2601806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UInt cmpnez32 ( UInt xx ) 2611806918ae2783af5808f00876581e01c7b650a0dsewardj{ 2621806918ae2783af5808f00876581e01c7b650a0dsewardj return xx==0 ? 0 : 0xFFFFFFFF; 2631806918ae2783af5808f00876581e01c7b650a0dsewardj} 2641806918ae2783af5808f00876581e01c7b650a0dsewardj 2651806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UShort cmpnez16 ( UShort xx ) 2661806918ae2783af5808f00876581e01c7b650a0dsewardj{ 267d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(xx==0 ? 0 : 0xFFFF); 2681806918ae2783af5808f00876581e01c7b650a0dsewardj} 2691806918ae2783af5808f00876581e01c7b650a0dsewardj 2701806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UChar cmpnez8 ( UChar xx ) 2711806918ae2783af5808f00876581e01c7b650a0dsewardj{ 272d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar(xx==0 ? 0 : 0xFF); 2731806918ae2783af5808f00876581e01c7b650a0dsewardj} 2741806918ae2783af5808f00876581e01c7b650a0dsewardj 275c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Short qnarrow32Sto16S ( UInt xx0 ) 27638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 27738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Int xx = (Int)xx0; 27838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx < -32768) xx = -32768; 27938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx > 32767) xx = 32767; 28038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Short)xx; 28138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 28238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 283c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Char qnarrow16Sto8S ( UShort xx0 ) 28438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 28538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Short xx = (Short)xx0; 28638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx < -128) xx = -128; 28738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx > 127) xx = 127; 28838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (Char)xx; 28938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 29038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 291c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline UChar qnarrow16Sto8U ( UShort xx0 ) 29238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 29338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj Short xx = (Short)xx0; 29438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx < 0) xx = 0; 29538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj if (xx > 255) xx = 255; 29638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)xx; 29738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 29838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 29938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* shifts: we don't care about out-of-range ones, since 30038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj that is dealt with at a higher level. */ 30138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 302d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar shl8 ( UChar v, UInt n ) 303d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 304d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return toUChar(v << n); 305d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 306d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 307d71ba837242cc470f622335b1c650bce8886a533sewardjstatic inline UChar sar8 ( UChar v, UInt n ) 308d71ba837242cc470f622335b1c650bce8886a533sewardj{ 309d71ba837242cc470f622335b1c650bce8886a533sewardj return toUChar(((Char)v) >> n); 310d71ba837242cc470f622335b1c650bce8886a533sewardj} 311d71ba837242cc470f622335b1c650bce8886a533sewardj 31238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shl16 ( UShort v, UInt n ) 31338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 314d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(v << n); 31538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 31638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 31738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shr16 ( UShort v, UInt n ) 31838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 319d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort((((UShort)v) >> n)); 32038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 32138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 32238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sar16 ( UShort v, UInt n ) 32338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 324d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort(((Short)v) >> n); 32538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 32638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 32738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shl32 ( UInt v, UInt n ) 32838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 32938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return v << n; 33038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 33138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 33238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shr32 ( UInt v, UInt n ) 33338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 33438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (((UInt)v) >> n); 33538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 33638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 33738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sar32 ( UInt v, UInt n ) 33838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 33938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return ((Int)v) >> n; 34038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 34138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 34238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar avg8U ( UChar xx, UChar yy ) 34338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 34438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt xxi = (UInt)xx; 34538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt yyi = (UInt)yy; 34638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt r = (xxi + yyi + 1) >> 1; 34738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UChar)r; 34838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 34938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 35038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort avg16U ( UShort xx, UShort yy ) 35138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 35238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt xxi = (UInt)xx; 35338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt yyi = (UInt)yy; 35438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt r = (xxi + yyi + 1) >> 1; 35538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return (UShort)r; 35638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 35738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 35838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short max16S ( Short xx, Short yy ) 35938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 360d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort((xx > yy) ? xx : yy); 36138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 36238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 36338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar max8U ( UChar xx, UChar yy ) 36438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 365d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar((xx > yy) ? xx : yy); 36638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 36738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 36838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short min16S ( Short xx, Short yy ) 36938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 370d19fc161147086f31126fef0955b426b4f843d02sewardj return toUShort((xx < yy) ? xx : yy); 37138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 37238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 37338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar min8U ( UChar xx, UChar yy ) 37438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 375d19fc161147086f31126fef0955b426b4f843d02sewardj return toUChar((xx < yy) ? xx : yy); 37638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 37738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 378e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hadd16U ( UShort xx, UShort yy ) 379e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 380e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 381e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 382e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi + yyi) >> 1; 383e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UShort)r; 384e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 385e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 386e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hadd16S ( Short xx, Short yy ) 387e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 388e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 389e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 390e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi + yyi) >> 1; 391e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Short)r; 392e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 393e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 394e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hsub16U ( UShort xx, UShort yy ) 395e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 396e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 397e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 398e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi - yyi) >> 1; 399e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UShort)r; 400e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 401e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 402e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hsub16S ( Short xx, Short yy ) 403e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 404e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 405e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 406e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi - yyi) >> 1; 407e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Short)r; 408e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 409e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 410e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hadd8U ( UChar xx, UChar yy ) 411e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 412e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 413e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 414e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi + yyi) >> 1; 415e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UChar)r; 416e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 417e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 418e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hadd8S ( Char xx, Char yy ) 419e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 420e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 421e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 422e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi + yyi) >> 1; 423e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Char)r; 424e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 425e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 426e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hsub8U ( UChar xx, UChar yy ) 427e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 428e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt xxi = (UInt)xx; 429e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt yyi = (UInt)yy; 430e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt r = (xxi - yyi) >> 1; 431e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (UChar)r; 432e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 433e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 434e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hsub8S ( Char xx, Char yy ) 435e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 436e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int xxi = (Int)xx; 437e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int yyi = (Int)yy; 438e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj Int r = (xxi - yyi) >> 1; 439e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (Char)r; 440e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 441e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 442310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjstatic inline UInt absdiff8U ( UChar xx, UChar yy ) 443310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{ 444310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj UInt xxu = (UChar)xx; 445310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj UInt yyu = (UChar)yy; 446310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj return xxu >= yyu ? xxu - yyu : yyu - xxu; 447310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj} 448e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 44938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */ 45038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Start of the externally visible functions. These simply 45138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj implement the corresponding IR primops. */ 45238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */ 45338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 45438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal addition ------------ */ 45538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 45638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add32x2 ( ULong xx, ULong yy ) 45738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 45838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 45938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(xx) + sel32x2_1(yy), 46038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(xx) + sel32x2_0(yy) 46138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 46238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 46338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 46438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add16x4 ( ULong xx, ULong yy ) 46538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 46638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 467d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_3(xx) + sel16x4_3(yy) ), 468d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_2(xx) + sel16x4_2(yy) ), 469d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_1(xx) + sel16x4_1(yy) ), 470d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_0(xx) + sel16x4_0(yy) ) 47138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 47238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 47338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 47438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add8x8 ( ULong xx, ULong yy ) 47538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 47638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 477d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_7(xx) + sel8x8_7(yy) ), 478d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_6(xx) + sel8x8_6(yy) ), 479d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_5(xx) + sel8x8_5(yy) ), 480d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_4(xx) + sel8x8_4(yy) ), 481d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_3(xx) + sel8x8_3(yy) ), 482d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_2(xx) + sel8x8_2(yy) ), 483d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_1(xx) + sel8x8_1(yy) ), 484d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_0(xx) + sel8x8_0(yy) ) 48538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 48638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 48738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 48838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating addition ------------ */ 48938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 49038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy ) 49138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 49238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 49338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_3(xx), sel16x4_3(yy) ), 49438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_2(xx), sel16x4_2(yy) ), 49538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_1(xx), sel16x4_1(yy) ), 49638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16S( sel16x4_0(xx), sel16x4_0(yy) ) 49738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 49838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 49938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 50038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy ) 50138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 50238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 50338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_7(xx), sel8x8_7(yy) ), 50438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_6(xx), sel8x8_6(yy) ), 50538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_5(xx), sel8x8_5(yy) ), 50638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_4(xx), sel8x8_4(yy) ), 50738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_3(xx), sel8x8_3(yy) ), 50838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_2(xx), sel8x8_2(yy) ), 50938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_1(xx), sel8x8_1(yy) ), 51038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8S( sel8x8_0(xx), sel8x8_0(yy) ) 51138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 51238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 51338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 51438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy ) 51538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 51638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 51738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_3(xx), sel16x4_3(yy) ), 51838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_2(xx), sel16x4_2(yy) ), 51938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_1(xx), sel16x4_1(yy) ), 52038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd16U( sel16x4_0(xx), sel16x4_0(yy) ) 52138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 52238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 52338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 52438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy ) 52538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 52638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 52738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_7(xx), sel8x8_7(yy) ), 52838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_6(xx), sel8x8_6(yy) ), 52938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_5(xx), sel8x8_5(yy) ), 53038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_4(xx), sel8x8_4(yy) ), 53138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_3(xx), sel8x8_3(yy) ), 53238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_2(xx), sel8x8_2(yy) ), 53338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_1(xx), sel8x8_1(yy) ), 53438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qadd8U( sel8x8_0(xx), sel8x8_0(yy) ) 53538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 53638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 53738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 53838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal subtraction ------------ */ 53938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 54038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy ) 54138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 54238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 54338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(xx) - sel32x2_1(yy), 54438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(xx) - sel32x2_0(yy) 54538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 54638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 54738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 54838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy ) 54938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 55038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 551d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_3(xx) - sel16x4_3(yy) ), 552d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_2(xx) - sel16x4_2(yy) ), 553d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_1(xx) - sel16x4_1(yy) ), 554d19fc161147086f31126fef0955b426b4f843d02sewardj toUShort( sel16x4_0(xx) - sel16x4_0(yy) ) 55538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 55638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 55738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 55838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy ) 55938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 56038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 561d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_7(xx) - sel8x8_7(yy) ), 562d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_6(xx) - sel8x8_6(yy) ), 563d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_5(xx) - sel8x8_5(yy) ), 564d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_4(xx) - sel8x8_4(yy) ), 565d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_3(xx) - sel8x8_3(yy) ), 566d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_2(xx) - sel8x8_2(yy) ), 567d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_1(xx) - sel8x8_1(yy) ), 568d19fc161147086f31126fef0955b426b4f843d02sewardj toUChar( sel8x8_0(xx) - sel8x8_0(yy) ) 56938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 57038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 57138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 57238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating subtraction ------------ */ 57338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 57438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy ) 57538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 57638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 57738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_3(xx), sel16x4_3(yy) ), 57838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_2(xx), sel16x4_2(yy) ), 57938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_1(xx), sel16x4_1(yy) ), 58038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16S( sel16x4_0(xx), sel16x4_0(yy) ) 58138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 58238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 58338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 58438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy ) 58538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 58638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 58738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_7(xx), sel8x8_7(yy) ), 58838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_6(xx), sel8x8_6(yy) ), 58938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_5(xx), sel8x8_5(yy) ), 59038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_4(xx), sel8x8_4(yy) ), 59138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_3(xx), sel8x8_3(yy) ), 59238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_2(xx), sel8x8_2(yy) ), 59338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_1(xx), sel8x8_1(yy) ), 59438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8S( sel8x8_0(xx), sel8x8_0(yy) ) 59538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 59638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 59738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 59838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy ) 59938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 60038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 60138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_3(xx), sel16x4_3(yy) ), 60238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_2(xx), sel16x4_2(yy) ), 60338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_1(xx), sel16x4_1(yy) ), 60438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub16U( sel16x4_0(xx), sel16x4_0(yy) ) 60538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 60638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 60738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 60838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy ) 60938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 61038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 61138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_7(xx), sel8x8_7(yy) ), 61238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_6(xx), sel8x8_6(yy) ), 61338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_5(xx), sel8x8_5(yy) ), 61438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_4(xx), sel8x8_4(yy) ), 61538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_3(xx), sel8x8_3(yy) ), 61638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_2(xx), sel8x8_2(yy) ), 61738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_1(xx), sel8x8_1(yy) ), 61838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj qsub8U( sel8x8_0(xx), sel8x8_0(yy) ) 61938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 62038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 62138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 62238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Multiplication ------------ */ 62338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 62438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy ) 62538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 62638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 62738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_3(xx), sel16x4_3(yy) ), 62838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_2(xx), sel16x4_2(yy) ), 62938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_1(xx), sel16x4_1(yy) ), 63038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mul16( sel16x4_0(xx), sel16x4_0(yy) ) 63138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 63238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 63338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 634d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy ) 635d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 636d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk32x2( 637d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj mul32( sel32x2_1(xx), sel32x2_1(yy) ), 638d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj mul32( sel32x2_0(xx), sel32x2_0(yy) ) 639d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 640d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 641d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 64238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy ) 64338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 64438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 64538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ), 64638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ), 64738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ), 64838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16S( sel16x4_0(xx), sel16x4_0(yy) ) 64938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 65038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 65138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 65238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy ) 65338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 65438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 65538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ), 65638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ), 65738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ), 65838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj mulhi16U( sel16x4_0(xx), sel16x4_0(yy) ) 65938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 66038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 66138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 66238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Comparison ------------ */ 66338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 66438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy ) 66538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 66638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 66738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ), 66838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq32( sel32x2_0(xx), sel32x2_0(yy) ) 66938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 67038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 67138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 67238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy ) 67338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 67438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 67538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ), 67638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ), 67738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ), 67838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq16( sel16x4_0(xx), sel16x4_0(yy) ) 67938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 68038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 68138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 68238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy ) 68338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 68438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 68538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ), 68638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ), 68738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ), 68838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ), 68938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ), 69038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ), 69138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ), 69238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpeq8( sel8x8_0(xx), sel8x8_0(yy) ) 69338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 69438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 69538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 69638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy ) 69738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 69838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 69938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ), 70038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) ) 70138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 70238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 70338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 70438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy ) 70538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 70638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 70738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ), 70838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ), 70938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ), 71038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) ) 71138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 71238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 71338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 71438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy ) 71538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 71638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 71738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ), 71838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ), 71938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ), 72038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ), 72138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ), 72238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ), 72338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ), 72438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) ) 72538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 72638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 72738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7281806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ32x2 ( ULong xx ) 7291806918ae2783af5808f00876581e01c7b650a0dsewardj{ 7301806918ae2783af5808f00876581e01c7b650a0dsewardj return mk32x2( 7311806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez32( sel32x2_1(xx) ), 7321806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez32( sel32x2_0(xx) ) 7331806918ae2783af5808f00876581e01c7b650a0dsewardj ); 7341806918ae2783af5808f00876581e01c7b650a0dsewardj} 7351806918ae2783af5808f00876581e01c7b650a0dsewardj 7361806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ16x4 ( ULong xx ) 7371806918ae2783af5808f00876581e01c7b650a0dsewardj{ 7381806918ae2783af5808f00876581e01c7b650a0dsewardj return mk16x4( 7391806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_3(xx) ), 7401806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_2(xx) ), 7411806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_1(xx) ), 7421806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez16( sel16x4_0(xx) ) 7431806918ae2783af5808f00876581e01c7b650a0dsewardj ); 7441806918ae2783af5808f00876581e01c7b650a0dsewardj} 7451806918ae2783af5808f00876581e01c7b650a0dsewardj 7461806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ8x8 ( ULong xx ) 7471806918ae2783af5808f00876581e01c7b650a0dsewardj{ 7481806918ae2783af5808f00876581e01c7b650a0dsewardj return mk8x8( 7491806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_7(xx) ), 7501806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_6(xx) ), 7511806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_5(xx) ), 7521806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_4(xx) ), 7531806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_3(xx) ), 7541806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_2(xx) ), 7551806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_1(xx) ), 7561806918ae2783af5808f00876581e01c7b650a0dsewardj cmpnez8( sel8x8_0(xx) ) 7571806918ae2783af5808f00876581e01c7b650a0dsewardj ); 7581806918ae2783af5808f00876581e01c7b650a0dsewardj} 7591806918ae2783af5808f00876581e01c7b650a0dsewardj 76038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating narrowing ------------ */ 76138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7625f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong aa, ULong bb ) 76338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 76438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt d = sel32x2_1(aa); 76538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt c = sel32x2_0(aa); 76638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt b = sel32x2_1(bb); 76738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UInt a = sel32x2_0(bb); 76838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 769c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(d), 770c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(c), 771c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(b), 772c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow32Sto16S(a) 77338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 77438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 77538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7765f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong aa, ULong bb ) 77738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 77838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort h = sel16x4_3(aa); 77938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort g = sel16x4_2(aa); 78038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort f = sel16x4_1(aa); 78138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort e = sel16x4_0(aa); 78238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort d = sel16x4_3(bb); 78338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort c = sel16x4_2(bb); 78438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort b = sel16x4_1(bb); 78538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort a = sel16x4_0(bb); 78638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 787c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(h), 788c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(g), 789c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(f), 790c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(e), 791c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(d), 792c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(c), 793c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(b), 794c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8S(a) 79538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 79638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 79738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 7985f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb ) 79938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 80038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort h = sel16x4_3(aa); 80138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort g = sel16x4_2(aa); 80238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort f = sel16x4_1(aa); 80338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort e = sel16x4_0(aa); 80438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort d = sel16x4_3(bb); 80538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort c = sel16x4_2(bb); 80638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort b = sel16x4_1(bb); 80738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj UShort a = sel16x4_0(bb); 80838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 809c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(h), 810c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(g), 811c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(f), 812c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(e), 813c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(d), 814c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(c), 815c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(b), 816c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj qnarrow16Sto8U(a) 81738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 81838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 81938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 82038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Interleaving ------------ */ 82138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 82238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb ) 82338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 82438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 82538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_7(aa), 82638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_7(bb), 82738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_6(aa), 82838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_6(bb), 82938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_5(aa), 83038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_5(bb), 83138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_4(aa), 83238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_4(bb) 83338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 83438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 83538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 83638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb ) 83738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 83838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 83938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_3(aa), 84038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_3(bb), 84138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_2(aa), 84238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_2(bb), 84338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_1(aa), 84438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_1(bb), 84538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_0(aa), 84638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel8x8_0(bb) 84738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 84838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 84938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 85038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb ) 85138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 85238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 85338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_3(aa), 85438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_3(bb), 85538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_2(aa), 85638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_2(bb) 85738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 85838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 85938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 86038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb ) 86138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 86238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 86338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_1(aa), 86438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_1(bb), 86538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_0(aa), 86638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel16x4_0(bb) 86738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 86838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 86938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 87038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb ) 87138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 87238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 87338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(aa), 87438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_1(bb) 87538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 87638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 87738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 87838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb ) 87938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 88038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 88138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(aa), 88238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sel32x2_0(bb) 88338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 88438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 88538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 886d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* ------------ Concatenation ------------ */ 887d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 888d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb ) 889d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 890d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk16x4( 891d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_3(aa), 892d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_1(aa), 893d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_3(bb), 894d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_1(bb) 895d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 896d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 897d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 898d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb ) 899d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 900d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk16x4( 901d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_2(aa), 902d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_0(aa), 903d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_2(bb), 904d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj sel16x4_0(bb) 905d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 906d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 907d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 908d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* misc hack looking for a proper home */ 909d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb ) 910d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 911d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk8x8( 912d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_7(bb)), 913d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_6(bb)), 914d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_5(bb)), 915d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_4(bb)), 916d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_3(bb)), 917d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_2(bb)), 918d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_1(bb)), 919d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj index8x8(aa, sel8x8_0(bb)) 920d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 921d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 92238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 92338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Shifting ------------ */ 92438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Note that because these primops are undefined if the shift amount 92538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj equals or exceeds the lane width, the shift amount is masked so 92638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj that the scalar shifts are always in range. In fact, given the 92738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj semantics of these primops (ShlN16x4, etc) it is an error if in 92838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj fact we are ever given an out-of-range shift amount. 92938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/ 93038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn ) 93138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 93238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 32); */ 93338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 31; 93438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 93538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl32( sel32x2_1(xx), nn ), 93638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl32( sel32x2_0(xx), nn ) 93738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 93838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 93938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 94038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn ) 94138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 94238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 16); */ 94338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 15; 94438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 94538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_3(xx), nn ), 94638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_2(xx), nn ), 94738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_1(xx), nn ), 94838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shl16( sel16x4_0(xx), nn ) 94938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 950d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj} 951d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj 952d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_ShlN8x8 ( ULong xx, UInt nn ) 953d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{ 954d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj /* vassert(nn < 8); */ 955d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj nn &= 7; 956d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj return mk8x8( 957d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_7(xx), nn ), 958d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_6(xx), nn ), 959d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_5(xx), nn ), 960d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_4(xx), nn ), 961d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_3(xx), nn ), 962d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_2(xx), nn ), 963d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_1(xx), nn ), 964d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj shl8( sel8x8_0(xx), nn ) 965d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj ); 96638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 96738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 96838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn ) 96938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 97038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 32); */ 97138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 31; 97238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 97338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr32( sel32x2_1(xx), nn ), 97438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr32( sel32x2_0(xx), nn ) 97538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 97638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 97738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 97838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn ) 97938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 98038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 16); */ 98138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 15; 98238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 98338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_3(xx), nn ), 98438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_2(xx), nn ), 98538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_1(xx), nn ), 98638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj shr16( sel16x4_0(xx), nn ) 98738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 98838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 98938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 99038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn ) 99138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 99238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 32); */ 99338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 31; 99438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk32x2( 99538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar32( sel32x2_1(xx), nn ), 99638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar32( sel32x2_0(xx), nn ) 99738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 99838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 99938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 100038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn ) 100138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 100238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj /* vassert(nn < 16); */ 100338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj nn &= 15; 100438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 100538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_3(xx), nn ), 100638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_2(xx), nn ), 100738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_1(xx), nn ), 100838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj sar16( sel16x4_0(xx), nn ) 100938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 101038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 101138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1012d71ba837242cc470f622335b1c650bce8886a533sewardjULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn ) 1013d71ba837242cc470f622335b1c650bce8886a533sewardj{ 1014d71ba837242cc470f622335b1c650bce8886a533sewardj /* vassert(nn < 8); */ 1015d71ba837242cc470f622335b1c650bce8886a533sewardj nn &= 7; 1016d71ba837242cc470f622335b1c650bce8886a533sewardj return mk8x8( 1017d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_7(xx), nn ), 1018d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_6(xx), nn ), 1019d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_5(xx), nn ), 1020d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_4(xx), nn ), 1021d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_3(xx), nn ), 1022d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_2(xx), nn ), 1023d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_1(xx), nn ), 1024d71ba837242cc470f622335b1c650bce8886a533sewardj sar8( sel8x8_0(xx), nn ) 1025d71ba837242cc470f622335b1c650bce8886a533sewardj ); 1026d71ba837242cc470f622335b1c650bce8886a533sewardj} 1027d71ba837242cc470f622335b1c650bce8886a533sewardj 102838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Averaging ------------ */ 102938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 103038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy ) 103138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 103238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 103338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_7(xx), sel8x8_7(yy) ), 103438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_6(xx), sel8x8_6(yy) ), 103538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_5(xx), sel8x8_5(yy) ), 103638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_4(xx), sel8x8_4(yy) ), 103738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_3(xx), sel8x8_3(yy) ), 103838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_2(xx), sel8x8_2(yy) ), 103938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_1(xx), sel8x8_1(yy) ), 104038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg8U( sel8x8_0(xx), sel8x8_0(yy) ) 104138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 104238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 104338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 104438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy ) 104538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 104638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 104738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_3(xx), sel16x4_3(yy) ), 104838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_2(xx), sel16x4_2(yy) ), 104938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_1(xx), sel16x4_1(yy) ), 105038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj avg16U( sel16x4_0(xx), sel16x4_0(yy) ) 105138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 105238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 105338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 105438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ max/min ------------ */ 105538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 105638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy ) 105738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 105838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 105938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_3(xx), sel16x4_3(yy) ), 106038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_2(xx), sel16x4_2(yy) ), 106138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_1(xx), sel16x4_1(yy) ), 106238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max16S( sel16x4_0(xx), sel16x4_0(yy) ) 106338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 106438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 106538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 106638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy ) 106738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 106838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 106938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_7(xx), sel8x8_7(yy) ), 107038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_6(xx), sel8x8_6(yy) ), 107138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_5(xx), sel8x8_5(yy) ), 107238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_4(xx), sel8x8_4(yy) ), 107338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_3(xx), sel8x8_3(yy) ), 107438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_2(xx), sel8x8_2(yy) ), 107538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_1(xx), sel8x8_1(yy) ), 107638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj max8U( sel8x8_0(xx), sel8x8_0(yy) ) 107738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 107838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 107938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 108038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy ) 108138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 108238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk16x4( 108338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_3(xx), sel16x4_3(yy) ), 108438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_2(xx), sel16x4_2(yy) ), 108538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_1(xx), sel16x4_1(yy) ), 108638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min16S( sel16x4_0(xx), sel16x4_0(yy) ) 108738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 108838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 108938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 109038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy ) 109138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{ 109238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj return mk8x8( 109338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_7(xx), sel8x8_7(yy) ), 109438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_6(xx), sel8x8_6(yy) ), 109538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_5(xx), sel8x8_5(yy) ), 109638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_4(xx), sel8x8_4(yy) ), 109738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_3(xx), sel8x8_3(yy) ), 109838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_2(xx), sel8x8_2(yy) ), 109938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_1(xx), sel8x8_1(yy) ), 110038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj min8U( sel8x8_0(xx), sel8x8_0(yy) ) 110138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj ); 110238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj} 110338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1104e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */ 1105e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1106e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* Tuple/select functions for 16x2 vectors. */ 1107e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk16x2 ( UShort w1, UShort w2 ) { 1108e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return (((UInt)w1) << 16) | ((UInt)w2); 1109e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1110e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1111e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_1 ( UInt w32 ) { 1112e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return 0xFFFF & (UShort)(w32 >> 16); 1113e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1114e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_0 ( UInt w32 ) { 1115e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return 0xFFFF & (UShort)(w32); 1116e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1117e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1118e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk8x4 ( UChar w3, UChar w2, 1119e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UChar w1, UChar w0 ) { 1120e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj UInt w32 = (((UInt)w3) << 24) | (((UInt)w2) << 16) 1121e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj | (((UInt)w1) << 8) | (((UInt)w0) << 0); 1122e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return w32; 1123e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1124e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1125e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_3 ( UInt w32 ) { 1126e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 24)); 1127e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1128e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_2 ( UInt w32 ) { 1129e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 16)); 1130e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1131e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_1 ( UInt w32 ) { 1132e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 8)); 1133e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1134e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_0 ( UInt w32 ) { 1135e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return toUChar(0xFF & (w32 >> 0)); 1136e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1137e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1138e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1139e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */ 1140e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* More externally visible functions. These simply 1141e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj implement the corresponding IR primops. */ 1142e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */ 1143e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1144e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 16x2 ------ */ 1145e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1146e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add16x2 ( UInt xx, UInt yy ) 1147e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1148e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( sel16x2_1(xx) + sel16x2_1(yy), 1149e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel16x2_0(xx) + sel16x2_0(yy) ); 1150e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1151e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1152e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy ) 1153e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1154e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( sel16x2_1(xx) - sel16x2_1(yy), 1155e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel16x2_0(xx) - sel16x2_0(yy) ); 1156e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1157e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1158e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy ) 1159e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1160e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ), 1161e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1162e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1163e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1164e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy ) 1165e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1166e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ), 1167e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1168e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1169e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1170e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy ) 1171e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1172e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ), 1173e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1174e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1175e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1176e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy ) 1177e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1178e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ), 1179e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1180e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1181e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1182e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy ) 1183e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1184e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ), 1185e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1186e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1187e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1188e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy ) 1189e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1190e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ), 1191e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1192e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1193e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1194e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy ) 1195e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1196e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ), 1197e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) ); 1198e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1199e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1200e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy ) 1201e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1202e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ), 1203e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) ); 1204e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1205e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1206e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 8x4 ------ */ 1207e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1208e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add8x4 ( UInt xx, UInt yy ) 1209e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1210e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1211e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_3(xx) + sel8x4_3(yy), 1212e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_2(xx) + sel8x4_2(yy), 1213e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_1(xx) + sel8x4_1(yy), 1214e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_0(xx) + sel8x4_0(yy) 1215e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1216e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1217e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1218e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy ) 1219e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1220e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1221e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_3(xx) - sel8x4_3(yy), 1222e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_2(xx) - sel8x4_2(yy), 1223e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_1(xx) - sel8x4_1(yy), 1224e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj sel8x4_0(xx) - sel8x4_0(yy) 1225e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1226e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1227e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1228e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy ) 1229e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1230e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1231e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_3(xx), sel8x4_3(yy) ), 1232e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_2(xx), sel8x4_2(yy) ), 1233e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_1(xx), sel8x4_1(yy) ), 1234e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8U( sel8x4_0(xx), sel8x4_0(yy) ) 1235e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1236e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1237e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1238e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy ) 1239e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1240e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1241e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_3(xx), sel8x4_3(yy) ), 1242e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_2(xx), sel8x4_2(yy) ), 1243e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_1(xx), sel8x4_1(yy) ), 1244e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hadd8S( sel8x4_0(xx), sel8x4_0(yy) ) 1245e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1246e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1247e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1248e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy ) 1249e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1250e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1251e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_3(xx), sel8x4_3(yy) ), 1252e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_2(xx), sel8x4_2(yy) ), 1253e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_1(xx), sel8x4_1(yy) ), 1254e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8U( sel8x4_0(xx), sel8x4_0(yy) ) 1255e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1256e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1257e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1258e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy ) 1259e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1260e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1261e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_3(xx), sel8x4_3(yy) ), 1262e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_2(xx), sel8x4_2(yy) ), 1263e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_1(xx), sel8x4_1(yy) ), 1264e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj hsub8S( sel8x4_0(xx), sel8x4_0(yy) ) 1265e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1266e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1267e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1268e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy ) 1269e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1270e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1271e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_3(xx), sel8x4_3(yy) ), 1272e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_2(xx), sel8x4_2(yy) ), 1273e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_1(xx), sel8x4_1(yy) ), 1274e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8U( sel8x4_0(xx), sel8x4_0(yy) ) 1275e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1276e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1277e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1278e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy ) 1279e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1280e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1281e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_3(xx), sel8x4_3(yy) ), 1282e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_2(xx), sel8x4_2(yy) ), 1283e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_1(xx), sel8x4_1(yy) ), 1284e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qadd8S( sel8x4_0(xx), sel8x4_0(yy) ) 1285e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1286e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1287e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1288e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy ) 1289e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1290e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1291e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_3(xx), sel8x4_3(yy) ), 1292e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_2(xx), sel8x4_2(yy) ), 1293e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_1(xx), sel8x4_1(yy) ), 1294e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8U( sel8x4_0(xx), sel8x4_0(yy) ) 1295e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1296e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1297e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1298e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy ) 1299e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1300e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1301e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_3(xx), sel8x4_3(yy) ), 1302e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_2(xx), sel8x4_2(yy) ), 1303e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_1(xx), sel8x4_1(yy) ), 1304e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj qsub8S( sel8x4_0(xx), sel8x4_0(yy) ) 1305e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1306e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1307e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1308e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ16x2 ( UInt xx ) 1309e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1310e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk16x2( 1311e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez16( sel16x2_1(xx) ), 1312e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez16( sel16x2_0(xx) ) 1313e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1314e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 1315e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj 1316e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ8x4 ( UInt xx ) 1317e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{ 1318e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj return mk8x4( 1319e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_3(xx) ), 1320e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_2(xx) ), 1321e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_1(xx) ), 1322e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj cmpnez8( sel8x4_0(xx) ) 1323e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj ); 1324e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj} 132538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj 1326310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjUInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy ) 1327310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{ 1328310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) ) 1329310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj + absdiff8U( sel8x4_2(xx), sel8x4_2(yy) ) 1330310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj + absdiff8U( sel8x4_1(xx), sel8x4_1(yy) ) 1331310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) ); 1332310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj} 1333310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj 1334310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj 133538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 1336cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj/*--- end host_generic_simd64.c ---*/ 133738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/ 1338