1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- begin host_generic_simd128.c ---*/ 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This file is part of Valgrind, a dynamic binary instrumentation 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown framework. 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Copyright (C) 2010-2011 OpenWorks GbR 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown info@open-works.net 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is free software; you can redistribute it and/or 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown modify it under the terms of the GNU General Public License as 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown published by the Free Software Foundation; either version 2 of the 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown License, or (at your option) any later version. 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is distributed in the hope that it will be useful, but 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown WITHOUT ANY WARRANTY; without even the implied warranty of 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown General Public License for more details. 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown You should have received a copy of the GNU General Public License 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown along with this program; if not, write to the Free Software 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 02110-1301, USA. 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The GNU General Public License is contained in the file COPYING. 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Generic helper functions for doing 128-bit SIMD arithmetic in cases 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown where the instruction selectors cannot generate code in-line. 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown These are purely back-end entities and cannot be seen/referenced 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown from IR. */ 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "libvex_basictypes.h" 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "host_generic_simd128.h" 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Primitive helpers always take args of the real type (signed vs 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown unsigned) but return an unsigned result, so there's no conversion 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown weirdness when stuffing results back in the V128 union fields, 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown which are all unsigned. */ 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt mul32 ( Int xx, Int yy ) 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int t = ((Int)xx) * ((Int)yy); 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUInt(t); 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt max32S ( Int xx, Int yy ) 52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUInt((xx > yy) ? xx : yy); 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt min32S ( Int xx, Int yy ) 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUInt((xx < yy) ? xx : yy); 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt max32U ( UInt xx, UInt yy ) 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUInt((xx > yy) ? xx : yy); 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt min32U ( UInt xx, UInt yy ) 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUInt((xx < yy) ? xx : yy); 69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UShort max16U ( UShort xx, UShort yy ) 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUShort((xx > yy) ? xx : yy); 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UShort min16U ( UShort xx, UShort yy ) 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUShort((xx < yy) ? xx : yy); 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UChar max8S ( Char xx, Char yy ) 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUChar((xx > yy) ? xx : yy); 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UChar min8S ( Char xx, Char yy ) 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return toUChar((xx < yy) ? xx : yy); 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 91b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline ULong cmpEQ64 ( Long xx, Long yy ) 92b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 93b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov return (((Long)xx) == ((Long)yy)) 94b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov ? 0xFFFFFFFFFFFFFFFFULL : 0ULL; 95b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 96b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline ULong cmpGT64S ( Long xx, Long yy ) 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return (((Long)xx) > ((Long)yy)) 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ? 0xFFFFFFFFFFFFFFFFULL : 0ULL; 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 103f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Rootstatic inline ULong sar64 ( ULong v, UInt n ) 104f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{ 105f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root return ((Long)v) >> n; 106f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root} 107f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root 108f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Rootstatic inline UChar sar8 ( UChar v, UInt n ) 109f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{ 110f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root return toUChar(((Char)v) >> n); 111f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root} 112f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root 113b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline UShort qnarrow32Sto16U ( UInt xx0 ) 114b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 115b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov Int xx = (Int)xx0; 116b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov if (xx < 0) xx = 0; 117b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov if (xx > 65535) xx = 65535; 118b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov return (UShort)xx; 119b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 120b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 121b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline UShort narrow32to16 ( UInt xx ) 122b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 123b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov return (UShort)xx; 124b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 125b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 126b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline UChar narrow16to8 ( UShort xx ) 127b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 128b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov return (UChar)xx; 129b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 130b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 131b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 132b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 133b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Mul32x4 ( /*OUT*/V128* res, 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[0] = mul32(argL->w32[0], argR->w32[0]); 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[1] = mul32(argL->w32[1], argR->w32[1]); 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[2] = mul32(argL->w32[2], argR->w32[2]); 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[3] = mul32(argL->w32[3], argR->w32[3]); 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 142b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 143b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Max32Sx4 ( /*OUT*/V128* res, 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[0] = max32S(argL->w32[0], argR->w32[0]); 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[1] = max32S(argL->w32[1], argR->w32[1]); 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[2] = max32S(argL->w32[2], argR->w32[2]); 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[3] = max32S(argL->w32[3], argR->w32[3]); 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 152b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 153b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Min32Sx4 ( /*OUT*/V128* res, 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[0] = min32S(argL->w32[0], argR->w32[0]); 157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[1] = min32S(argL->w32[1], argR->w32[1]); 158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[2] = min32S(argL->w32[2], argR->w32[2]); 159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[3] = min32S(argL->w32[3], argR->w32[3]); 160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 162b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 163b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Max32Ux4 ( /*OUT*/V128* res, 164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[0] = max32U(argL->w32[0], argR->w32[0]); 167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[1] = max32U(argL->w32[1], argR->w32[1]); 168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[2] = max32U(argL->w32[2], argR->w32[2]); 169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[3] = max32U(argL->w32[3], argR->w32[3]); 170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 172b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 173b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Min32Ux4 ( /*OUT*/V128* res, 174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[0] = min32U(argL->w32[0], argR->w32[0]); 177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[1] = min32U(argL->w32[1], argR->w32[1]); 178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[2] = min32U(argL->w32[2], argR->w32[2]); 179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w32[3] = min32U(argL->w32[3], argR->w32[3]); 180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 182b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 183b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Max16Ux8 ( /*OUT*/V128* res, 184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[0] = max16U(argL->w16[0], argR->w16[0]); 187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[1] = max16U(argL->w16[1], argR->w16[1]); 188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[2] = max16U(argL->w16[2], argR->w16[2]); 189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[3] = max16U(argL->w16[3], argR->w16[3]); 190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[4] = max16U(argL->w16[4], argR->w16[4]); 191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[5] = max16U(argL->w16[5], argR->w16[5]); 192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[6] = max16U(argL->w16[6], argR->w16[6]); 193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[7] = max16U(argL->w16[7], argR->w16[7]); 194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 196b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 197b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Min16Ux8 ( /*OUT*/V128* res, 198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 200ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[0] = min16U(argL->w16[0], argR->w16[0]); 201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[1] = min16U(argL->w16[1], argR->w16[1]); 202ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[2] = min16U(argL->w16[2], argR->w16[2]); 203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[3] = min16U(argL->w16[3], argR->w16[3]); 204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[4] = min16U(argL->w16[4], argR->w16[4]); 205ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[5] = min16U(argL->w16[5], argR->w16[5]); 206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[6] = min16U(argL->w16[6], argR->w16[6]); 207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w16[7] = min16U(argL->w16[7], argR->w16[7]); 208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 209ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 210b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 211b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Max8Sx16 ( /*OUT*/V128* res, 212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]); 215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]); 216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]); 217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]); 218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]); 219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]); 220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]); 221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]); 222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]); 223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]); 224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[10] = max8S(argL->w8[10], argR->w8[10]); 225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[11] = max8S(argL->w8[11], argR->w8[11]); 226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[12] = max8S(argL->w8[12], argR->w8[12]); 227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[13] = max8S(argL->w8[13], argR->w8[13]); 228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[14] = max8S(argL->w8[14], argR->w8[14]); 229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[15] = max8S(argL->w8[15], argR->w8[15]); 230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 232b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 233b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_Min8Sx16 ( /*OUT*/V128* res, 234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]); 237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]); 238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]); 239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]); 240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]); 241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]); 242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]); 243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]); 244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]); 245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]); 246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[10] = min8S(argL->w8[10], argR->w8[10]); 247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[11] = min8S(argL->w8[11], argR->w8[11]); 248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[12] = min8S(argL->w8[12], argR->w8[12]); 249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[13] = min8S(argL->w8[13], argR->w8[13]); 250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[14] = min8S(argL->w8[14], argR->w8[14]); 251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w8[15] = min8S(argL->w8[15], argR->w8[15]); 252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 254b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 255b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res, 256b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov V128* argL, V128* argR ) 257b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 258b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]); 259b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]); 260b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 261b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 262b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 263b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res, 264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argL, V128* argR ) 265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]); 267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]); 268ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 269ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 270f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root/* ------------ Shifting ------------ */ 271f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root/* Note that because these primops are undefined if the shift amount 272f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root equals or exceeds the lane width, the shift amount is masked so 273f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root that the scalar shifts are always in range. In fact, given the 274f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root semantics of these primops (Sar64x2, etc) it is an error if in 275f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root fact we are ever given an out-of-range shift amount. 276f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root*/ 277b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid /*not-regparm*/ 278b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_SarN64x2 ( /*OUT*/V128* res, 279f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root V128* argL, UInt nn) 280f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{ 281f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root /* vassert(nn < 64); */ 282f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root nn &= 63; 283f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w64[0] = sar64(argL->w64[0], nn); 284f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w64[1] = sar64(argL->w64[1], nn); 285f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root} 286f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root 287b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid /*not-regparm*/ 288b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_SarN8x16 ( /*OUT*/V128* res, 289f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root V128* argL, UInt nn) 290f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{ 291f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root /* vassert(nn < 8); */ 292f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root nn &= 7; 293f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 0] = sar8(argL->w8[ 0], nn); 294f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 1] = sar8(argL->w8[ 1], nn); 295f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 2] = sar8(argL->w8[ 2], nn); 296f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 3] = sar8(argL->w8[ 3], nn); 297f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 4] = sar8(argL->w8[ 4], nn); 298f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 5] = sar8(argL->w8[ 5], nn); 299f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 6] = sar8(argL->w8[ 6], nn); 300f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 7] = sar8(argL->w8[ 7], nn); 301f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 8] = sar8(argL->w8[ 8], nn); 302f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[ 9] = sar8(argL->w8[ 9], nn); 303f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[10] = sar8(argL->w8[10], nn); 304f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[11] = sar8(argL->w8[11], nn); 305f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[12] = sar8(argL->w8[12], nn); 306f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[13] = sar8(argL->w8[13], nn); 307f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[14] = sar8(argL->w8[14], nn); 308f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root res->w8[15] = sar8(argL->w8[15], nn); 309f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root} 310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 311b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 312b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res, 313b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov V128* argL, V128* argR ) 314b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 315b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[0] = qnarrow32Sto16U(argR->w32[0]); 316b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[1] = qnarrow32Sto16U(argR->w32[1]); 317b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[2] = qnarrow32Sto16U(argR->w32[2]); 318b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[3] = qnarrow32Sto16U(argR->w32[3]); 319b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[4] = qnarrow32Sto16U(argL->w32[0]); 320b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[5] = qnarrow32Sto16U(argL->w32[1]); 321b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[6] = qnarrow32Sto16U(argL->w32[2]); 322b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[7] = qnarrow32Sto16U(argL->w32[3]); 323b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 324b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 325b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 326b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res, 327b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov V128* argL, V128* argR ) 328b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 329b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 0] = narrow16to8(argR->w16[0]); 330b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 1] = narrow16to8(argR->w16[1]); 331b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 2] = narrow16to8(argR->w16[2]); 332b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 3] = narrow16to8(argR->w16[3]); 333b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 4] = narrow16to8(argR->w16[4]); 334b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 5] = narrow16to8(argR->w16[5]); 335b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 6] = narrow16to8(argR->w16[6]); 336b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 7] = narrow16to8(argR->w16[7]); 337b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 8] = narrow16to8(argL->w16[0]); 338b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[ 9] = narrow16to8(argL->w16[1]); 339b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[10] = narrow16to8(argL->w16[2]); 340b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[11] = narrow16to8(argL->w16[3]); 341b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[12] = narrow16to8(argL->w16[4]); 342b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[13] = narrow16to8(argL->w16[5]); 343b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[14] = narrow16to8(argL->w16[6]); 344b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w8[15] = narrow16to8(argL->w16[7]); 345b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 346b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 347b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3) 348b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res, 349b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov V128* argL, V128* argR ) 350b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{ 351b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[0] = narrow32to16(argR->w32[0]); 352b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[1] = narrow32to16(argR->w32[1]); 353b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[2] = narrow32to16(argR->w32[2]); 354b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[3] = narrow32to16(argR->w32[3]); 355b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[4] = narrow32to16(argL->w32[0]); 356b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[5] = narrow32to16(argL->w32[1]); 357b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[6] = narrow32to16(argL->w32[2]); 358b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov res->w16[7] = narrow32to16(argL->w32[3]); 359b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov} 360b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 361b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov 362ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 363ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end host_generic_simd128.c ---*/ 364ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 365