169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/*---------------------------------------------------------------*/ 369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/*--- begin host_generic_simd128.c ---*/ 469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/*---------------------------------------------------------------*/ 569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/* 769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj This file is part of Valgrind, a dynamic binary instrumentation 869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj framework. 969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 10ed39800a83baf5bffbe391f3974eb2af0f415f80Elliott Hughes Copyright (C) 2010-2017 OpenWorks GbR 1169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj info@open-works.net 1269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 1369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj This program is free software; you can redistribute it and/or 1469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj modify it under the terms of the GNU General Public License as 1569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj published by the Free Software Foundation; either version 2 of the 1669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj License, or (at your option) any later version. 1769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 1869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj This program is distributed in the hope that it will be useful, but 1969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj WITHOUT ANY WARRANTY; without even the implied warranty of 2069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 2169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj General Public License for more details. 2269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 2369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj You should have received a copy of the GNU General Public License 2469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj along with this program; if not, write to the Free Software 2569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 2669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 02110-1301, USA. 2769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 2869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj The GNU General Public License is contained in the file COPYING. 2969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj*/ 3069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 3169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/* Generic helper functions for doing 128-bit SIMD arithmetic in cases 3269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj where the instruction selectors cannot generate code in-line. 3369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj These are purely back-end entities and cannot be seen/referenced 3469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj from IR. */ 3569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 3669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj#include "libvex_basictypes.h" 3769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj#include "host_generic_simd128.h" 3869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 3969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 4069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/* Primitive helpers always take args of the real type (signed vs 4169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj unsigned) but return an unsigned result, so there's no conversion 4269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj weirdness when stuffing results back in the V128 union fields, 4369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj which are all unsigned. */ 4469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 4569d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UInt mul32 ( Int xx, Int yy ) 4669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 47108e03fcb0a4ef42164235b1988aa540aa1e5298florian Long t = ((Long)xx) * ((Long)yy); 4869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUInt(t); 4969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 5069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 5169d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UInt max32S ( Int xx, Int yy ) 5269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 5369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUInt((xx > yy) ? xx : yy); 5469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 5569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 5669d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UInt min32S ( Int xx, Int yy ) 5769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 5869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUInt((xx < yy) ? xx : yy); 5969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 6069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 6169d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UInt max32U ( UInt xx, UInt yy ) 6269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 6369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUInt((xx > yy) ? xx : yy); 6469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 6569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 6669d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UInt min32U ( UInt xx, UInt yy ) 6769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 6869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUInt((xx < yy) ? xx : yy); 6969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 7069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 7169d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UShort max16U ( UShort xx, UShort yy ) 7269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 7369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUShort((xx > yy) ? xx : yy); 7469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 7569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 7669d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UShort min16U ( UShort xx, UShort yy ) 7769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 7869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUShort((xx < yy) ? xx : yy); 7969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 8069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 8169d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UChar max8S ( Char xx, Char yy ) 8269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 8369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUChar((xx > yy) ? xx : yy); 8469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 8569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 8669d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline UChar min8S ( Char xx, Char yy ) 8769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 8869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return toUChar((xx < yy) ? xx : yy); 8969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 9069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 91d881562f1d18ab33038dab4a1b24823dba3422c0sewardjstatic inline ULong cmpEQ64 ( Long xx, Long yy ) 92d881562f1d18ab33038dab4a1b24823dba3422c0sewardj{ 93d881562f1d18ab33038dab4a1b24823dba3422c0sewardj return (((Long)xx) == ((Long)yy)) 94d881562f1d18ab33038dab4a1b24823dba3422c0sewardj ? 0xFFFFFFFFFFFFFFFFULL : 0ULL; 95d881562f1d18ab33038dab4a1b24823dba3422c0sewardj} 96d881562f1d18ab33038dab4a1b24823dba3422c0sewardj 9769d98e3853a63e578e039894e2ef00ca6f9878c8sewardjstatic inline ULong cmpGT64S ( Long xx, Long yy ) 9869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 9969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj return (((Long)xx) > ((Long)yy)) 10069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj ? 0xFFFFFFFFFFFFFFFFULL : 0ULL; 10169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 10269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 1030874beee91898d987c327212c7fb35fdb5db0735sewardjstatic inline ULong sar64 ( ULong v, UInt n ) 1040874beee91898d987c327212c7fb35fdb5db0735sewardj{ 1050874beee91898d987c327212c7fb35fdb5db0735sewardj return ((Long)v) >> n; 1060874beee91898d987c327212c7fb35fdb5db0735sewardj} 1070874beee91898d987c327212c7fb35fdb5db0735sewardj 1080874beee91898d987c327212c7fb35fdb5db0735sewardjstatic inline UChar sar8 ( UChar v, UInt n ) 1090874beee91898d987c327212c7fb35fdb5db0735sewardj{ 1100874beee91898d987c327212c7fb35fdb5db0735sewardj return toUChar(((Char)v) >> n); 1110874beee91898d987c327212c7fb35fdb5db0735sewardj} 1120874beee91898d987c327212c7fb35fdb5db0735sewardj 1132260b993536a3d41d5833504371981718b43154esewardjstatic inline UShort qnarrow32Sto16U ( UInt xx0 ) 1142260b993536a3d41d5833504371981718b43154esewardj{ 1152260b993536a3d41d5833504371981718b43154esewardj Int xx = (Int)xx0; 1162260b993536a3d41d5833504371981718b43154esewardj if (xx < 0) xx = 0; 1172260b993536a3d41d5833504371981718b43154esewardj if (xx > 65535) xx = 65535; 1182260b993536a3d41d5833504371981718b43154esewardj return (UShort)xx; 1192260b993536a3d41d5833504371981718b43154esewardj} 1202260b993536a3d41d5833504371981718b43154esewardj 121ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjstatic inline UShort narrow32to16 ( UInt xx ) 122ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 123ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj return (UShort)xx; 124ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 125ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 126ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjstatic inline UChar narrow16to8 ( UShort xx ) 127ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 128ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj return (UChar)xx; 129ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 130ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 131ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 132ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 133ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Mul32x4 ( /*OUT*/V128* res, 13469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 13569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 13669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[0] = mul32(argL->w32[0], argR->w32[0]); 13769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[1] = mul32(argL->w32[1], argR->w32[1]); 13869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[2] = mul32(argL->w32[2], argR->w32[2]); 13969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[3] = mul32(argL->w32[3], argR->w32[3]); 14069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 14169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 142ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 143ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Max32Sx4 ( /*OUT*/V128* res, 14469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 14569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 14669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[0] = max32S(argL->w32[0], argR->w32[0]); 14769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[1] = max32S(argL->w32[1], argR->w32[1]); 14869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[2] = max32S(argL->w32[2], argR->w32[2]); 14969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[3] = max32S(argL->w32[3], argR->w32[3]); 15069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 15169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 152ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 153ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Min32Sx4 ( /*OUT*/V128* res, 15469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 15569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 15669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[0] = min32S(argL->w32[0], argR->w32[0]); 15769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[1] = min32S(argL->w32[1], argR->w32[1]); 15869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[2] = min32S(argL->w32[2], argR->w32[2]); 15969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[3] = min32S(argL->w32[3], argR->w32[3]); 16069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 16169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 162ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 163ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Max32Ux4 ( /*OUT*/V128* res, 16469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 16569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 16669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[0] = max32U(argL->w32[0], argR->w32[0]); 16769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[1] = max32U(argL->w32[1], argR->w32[1]); 16869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[2] = max32U(argL->w32[2], argR->w32[2]); 16969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[3] = max32U(argL->w32[3], argR->w32[3]); 17069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 17169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 172ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 173ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Min32Ux4 ( /*OUT*/V128* res, 17469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 17569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 17669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[0] = min32U(argL->w32[0], argR->w32[0]); 17769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[1] = min32U(argL->w32[1], argR->w32[1]); 17869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[2] = min32U(argL->w32[2], argR->w32[2]); 17969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w32[3] = min32U(argL->w32[3], argR->w32[3]); 18069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 18169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 182ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 183ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Max16Ux8 ( /*OUT*/V128* res, 18469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 18569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 18669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[0] = max16U(argL->w16[0], argR->w16[0]); 18769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[1] = max16U(argL->w16[1], argR->w16[1]); 18869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[2] = max16U(argL->w16[2], argR->w16[2]); 18969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[3] = max16U(argL->w16[3], argR->w16[3]); 19069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[4] = max16U(argL->w16[4], argR->w16[4]); 19169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[5] = max16U(argL->w16[5], argR->w16[5]); 19269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[6] = max16U(argL->w16[6], argR->w16[6]); 19369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[7] = max16U(argL->w16[7], argR->w16[7]); 19469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 19569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 196ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 197ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Min16Ux8 ( /*OUT*/V128* res, 19869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 19969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 20069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[0] = min16U(argL->w16[0], argR->w16[0]); 20169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[1] = min16U(argL->w16[1], argR->w16[1]); 20269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[2] = min16U(argL->w16[2], argR->w16[2]); 20369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[3] = min16U(argL->w16[3], argR->w16[3]); 20469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[4] = min16U(argL->w16[4], argR->w16[4]); 20569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[5] = min16U(argL->w16[5], argR->w16[5]); 20669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[6] = min16U(argL->w16[6], argR->w16[6]); 20769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w16[7] = min16U(argL->w16[7], argR->w16[7]); 20869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 20969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 210ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 211ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Max8Sx16 ( /*OUT*/V128* res, 21269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 21369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 21469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]); 21569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]); 21669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]); 21769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]); 21869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]); 21969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]); 22069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]); 22169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]); 22269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]); 22369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]); 22469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[10] = max8S(argL->w8[10], argR->w8[10]); 22569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[11] = max8S(argL->w8[11], argR->w8[11]); 22669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[12] = max8S(argL->w8[12], argR->w8[12]); 22769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[13] = max8S(argL->w8[13], argR->w8[13]); 22869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[14] = max8S(argL->w8[14], argR->w8[14]); 22969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[15] = max8S(argL->w8[15], argR->w8[15]); 23069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 23169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 232ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 233ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_Min8Sx16 ( /*OUT*/V128* res, 23469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 23569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 23669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]); 23769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]); 23869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]); 23969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]); 24069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]); 24169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]); 24269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]); 24369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]); 24469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]); 24569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]); 24669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[10] = min8S(argL->w8[10], argR->w8[10]); 24769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[11] = min8S(argL->w8[11], argR->w8[11]); 24869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[12] = min8S(argL->w8[12], argR->w8[12]); 24969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[13] = min8S(argL->w8[13], argR->w8[13]); 25069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[14] = min8S(argL->w8[14], argR->w8[14]); 25169d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w8[15] = min8S(argL->w8[15], argR->w8[15]); 25269d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 25369d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 254ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 255ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res, 256d881562f1d18ab33038dab4a1b24823dba3422c0sewardj V128* argL, V128* argR ) 257d881562f1d18ab33038dab4a1b24823dba3422c0sewardj{ 258d881562f1d18ab33038dab4a1b24823dba3422c0sewardj res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]); 259d881562f1d18ab33038dab4a1b24823dba3422c0sewardj res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]); 260d881562f1d18ab33038dab4a1b24823dba3422c0sewardj} 261d881562f1d18ab33038dab4a1b24823dba3422c0sewardj 262ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 263ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res, 26469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj V128* argL, V128* argR ) 26569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj{ 26669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]); 26769d98e3853a63e578e039894e2ef00ca6f9878c8sewardj res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]); 26869d98e3853a63e578e039894e2ef00ca6f9878c8sewardj} 26969d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 2700874beee91898d987c327212c7fb35fdb5db0735sewardj/* ------------ Shifting ------------ */ 2710874beee91898d987c327212c7fb35fdb5db0735sewardj/* Note that because these primops are undefined if the shift amount 2720874beee91898d987c327212c7fb35fdb5db0735sewardj equals or exceeds the lane width, the shift amount is masked so 2730874beee91898d987c327212c7fb35fdb5db0735sewardj that the scalar shifts are always in range. In fact, given the 2740874beee91898d987c327212c7fb35fdb5db0735sewardj semantics of these primops (Sar64x2, etc) it is an error if in 2750874beee91898d987c327212c7fb35fdb5db0735sewardj fact we are ever given an out-of-range shift amount. 2760874beee91898d987c327212c7fb35fdb5db0735sewardj*/ 277ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid /*not-regparm*/ 278ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_SarN64x2 ( /*OUT*/V128* res, 2790874beee91898d987c327212c7fb35fdb5db0735sewardj V128* argL, UInt nn) 2800874beee91898d987c327212c7fb35fdb5db0735sewardj{ 2810874beee91898d987c327212c7fb35fdb5db0735sewardj /* vassert(nn < 64); */ 2820874beee91898d987c327212c7fb35fdb5db0735sewardj nn &= 63; 2830874beee91898d987c327212c7fb35fdb5db0735sewardj res->w64[0] = sar64(argL->w64[0], nn); 2840874beee91898d987c327212c7fb35fdb5db0735sewardj res->w64[1] = sar64(argL->w64[1], nn); 2850874beee91898d987c327212c7fb35fdb5db0735sewardj} 2860874beee91898d987c327212c7fb35fdb5db0735sewardj 287ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid /*not-regparm*/ 288ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_SarN8x16 ( /*OUT*/V128* res, 2890874beee91898d987c327212c7fb35fdb5db0735sewardj V128* argL, UInt nn) 2900874beee91898d987c327212c7fb35fdb5db0735sewardj{ 2910874beee91898d987c327212c7fb35fdb5db0735sewardj /* vassert(nn < 8); */ 2920874beee91898d987c327212c7fb35fdb5db0735sewardj nn &= 7; 2930874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 0] = sar8(argL->w8[ 0], nn); 2940874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 1] = sar8(argL->w8[ 1], nn); 2950874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 2] = sar8(argL->w8[ 2], nn); 2960874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 3] = sar8(argL->w8[ 3], nn); 2970874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 4] = sar8(argL->w8[ 4], nn); 2980874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 5] = sar8(argL->w8[ 5], nn); 2990874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 6] = sar8(argL->w8[ 6], nn); 3000874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 7] = sar8(argL->w8[ 7], nn); 3010874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 8] = sar8(argL->w8[ 8], nn); 3020874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[ 9] = sar8(argL->w8[ 9], nn); 3030874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[10] = sar8(argL->w8[10], nn); 3040874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[11] = sar8(argL->w8[11], nn); 3050874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[12] = sar8(argL->w8[12], nn); 3060874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[13] = sar8(argL->w8[13], nn); 3070874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[14] = sar8(argL->w8[14], nn); 3080874beee91898d987c327212c7fb35fdb5db0735sewardj res->w8[15] = sar8(argL->w8[15], nn); 3090874beee91898d987c327212c7fb35fdb5db0735sewardj} 31069d98e3853a63e578e039894e2ef00ca6f9878c8sewardj 311ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 312ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res, 3135f438dd73072211989c6d496845bdc9b777ecbecsewardj V128* argL, V128* argR ) 3142260b993536a3d41d5833504371981718b43154esewardj{ 3152260b993536a3d41d5833504371981718b43154esewardj res->w16[0] = qnarrow32Sto16U(argR->w32[0]); 3162260b993536a3d41d5833504371981718b43154esewardj res->w16[1] = qnarrow32Sto16U(argR->w32[1]); 3172260b993536a3d41d5833504371981718b43154esewardj res->w16[2] = qnarrow32Sto16U(argR->w32[2]); 3182260b993536a3d41d5833504371981718b43154esewardj res->w16[3] = qnarrow32Sto16U(argR->w32[3]); 3192260b993536a3d41d5833504371981718b43154esewardj res->w16[4] = qnarrow32Sto16U(argL->w32[0]); 3202260b993536a3d41d5833504371981718b43154esewardj res->w16[5] = qnarrow32Sto16U(argL->w32[1]); 3212260b993536a3d41d5833504371981718b43154esewardj res->w16[6] = qnarrow32Sto16U(argL->w32[2]); 3222260b993536a3d41d5833504371981718b43154esewardj res->w16[7] = qnarrow32Sto16U(argL->w32[3]); 3232260b993536a3d41d5833504371981718b43154esewardj} 3242260b993536a3d41d5833504371981718b43154esewardj 325ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 326ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res, 327ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj V128* argL, V128* argR ) 328ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 329ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 0] = narrow16to8(argR->w16[0]); 330ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 1] = narrow16to8(argR->w16[1]); 331ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 2] = narrow16to8(argR->w16[2]); 332ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 3] = narrow16to8(argR->w16[3]); 333ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 4] = narrow16to8(argR->w16[4]); 334ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 5] = narrow16to8(argR->w16[5]); 335ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 6] = narrow16to8(argR->w16[6]); 336ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 7] = narrow16to8(argR->w16[7]); 337ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 8] = narrow16to8(argL->w16[0]); 338ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[ 9] = narrow16to8(argL->w16[1]); 339ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[10] = narrow16to8(argL->w16[2]); 340ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[11] = narrow16to8(argL->w16[3]); 341ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[12] = narrow16to8(argL->w16[4]); 342ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[13] = narrow16to8(argL->w16[5]); 343ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[14] = narrow16to8(argL->w16[6]); 344ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w8[15] = narrow16to8(argL->w16[7]); 345ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 346ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 347ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjvoid VEX_REGPARM(3) 348ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res, 349ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj V128* argL, V128* argR ) 350ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{ 351ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[0] = narrow32to16(argR->w32[0]); 352ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[1] = narrow32to16(argR->w32[1]); 353ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[2] = narrow32to16(argR->w32[2]); 354ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[3] = narrow32to16(argR->w32[3]); 355ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[4] = narrow32to16(argL->w32[0]); 356ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[5] = narrow32to16(argL->w32[1]); 357ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[6] = narrow32to16(argL->w32[2]); 358ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj res->w16[7] = narrow32to16(argL->w32[3]); 359ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj} 360ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj 361d8bca7e48c8b3ad028878ab501197ba701df43e5sewardjvoid VEX_REGPARM(3) 362d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj h_generic_calc_Perm32x4 ( /*OUT*/V128* res, 363d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj V128* argL, V128* argR ) 364d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj{ 365d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj res->w32[0] = argL->w32[ argR->w32[0] & 3 ]; 366d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj res->w32[1] = argL->w32[ argR->w32[1] & 3 ]; 367d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj res->w32[2] = argL->w32[ argR->w32[2] & 3 ]; 368d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj res->w32[3] = argL->w32[ argR->w32[3] & 3 ]; 369d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj} 370d8bca7e48c8b3ad028878ab501197ba701df43e5sewardj 37178a20592024c096430f630006412fec063568bfdsewardjUInt /*not-regparm*/ 37278a20592024c096430f630006412fec063568bfdsewardj h_generic_calc_GetMSBs8x16 ( ULong w64hi, ULong w64lo ) 37378a20592024c096430f630006412fec063568bfdsewardj{ 37478a20592024c096430f630006412fec063568bfdsewardj UInt r = 0; 37578a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (64-1))) r |= (1<<15); 37678a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (56-1))) r |= (1<<14); 37778a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (48-1))) r |= (1<<13); 37878a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (40-1))) r |= (1<<12); 37978a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (32-1))) r |= (1<<11); 38078a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (24-1))) r |= (1<<10); 38178a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << (16-1))) r |= (1<<9); 38278a20592024c096430f630006412fec063568bfdsewardj if (w64hi & (1ULL << ( 8-1))) r |= (1<<8); 38378a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (64-1))) r |= (1<<7); 38478a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (56-1))) r |= (1<<6); 38578a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (48-1))) r |= (1<<5); 38678a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (40-1))) r |= (1<<4); 38778a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (32-1))) r |= (1<<3); 38878a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (24-1))) r |= (1<<2); 38978a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << (16-1))) r |= (1<<1); 39078a20592024c096430f630006412fec063568bfdsewardj if (w64lo & (1ULL << ( 8-1))) r |= (1<<0); 39178a20592024c096430f630006412fec063568bfdsewardj return r; 39278a20592024c096430f630006412fec063568bfdsewardj} 3932260b993536a3d41d5833504371981718b43154esewardj 39469d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/*---------------------------------------------------------------*/ 39569d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/*--- end host_generic_simd128.c ---*/ 39669d98e3853a63e578e039894e2ef00ca6f9878c8sewardj/*---------------------------------------------------------------*/ 397