1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- begin                            host_generic_simd128.c ---*/
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This file is part of Valgrind, a dynamic binary instrumentation
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   framework.
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
10b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   Copyright (C) 2010-2011 OpenWorks GbR
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      info@open-works.net
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is free software; you can redistribute it and/or
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   modify it under the terms of the GNU General Public License as
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   published by the Free Software Foundation; either version 2 of the
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   License, or (at your option) any later version.
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is distributed in the hope that it will be useful, but
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   WITHOUT ANY WARRANTY; without even the implied warranty of
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   General Public License for more details.
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   You should have received a copy of the GNU General Public License
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   along with this program; if not, write to the Free Software
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   02110-1301, USA.
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   The GNU General Public License is contained in the file COPYING.
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   where the instruction selectors cannot generate code in-line.
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   These are purely back-end entities and cannot be seen/referenced
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   from IR. */
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "libvex_basictypes.h"
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "host_generic_simd128.h"
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Primitive helpers always take args of the real type (signed vs
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   unsigned) but return an unsigned result, so there's no conversion
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   weirdness when stuffing results back in the V128 union fields,
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   which are all unsigned. */
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt mul32 ( Int xx, Int yy )
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int t = ((Int)xx) * ((Int)yy);
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUInt(t);
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt max32S ( Int xx, Int yy )
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUInt((xx > yy) ? xx : yy);
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt min32S ( Int xx, Int yy )
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUInt((xx < yy) ? xx : yy);
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt max32U ( UInt xx, UInt yy )
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUInt((xx > yy) ? xx : yy);
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt min32U ( UInt xx, UInt yy )
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUInt((xx < yy) ? xx : yy);
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UShort max16U ( UShort xx, UShort yy )
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUShort((xx > yy) ? xx : yy);
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UShort min16U ( UShort xx, UShort yy )
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUShort((xx < yy) ? xx : yy);
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UChar max8S ( Char xx, Char yy )
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUChar((xx > yy) ? xx : yy);
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UChar min8S ( Char xx, Char yy )
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return toUChar((xx < yy) ? xx : yy);
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
91b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline ULong cmpEQ64 ( Long xx, Long yy )
92b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
93b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   return (((Long)xx) == ((Long)yy))
94b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov             ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
95b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
96b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline ULong cmpGT64S ( Long xx, Long yy )
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return (((Long)xx) > ((Long)yy))
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown             ? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
103f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Rootstatic inline ULong sar64 ( ULong v, UInt n )
104f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{
105f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   return ((Long)v) >> n;
106f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root}
107f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root
108f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Rootstatic inline UChar sar8 ( UChar v, UInt n )
109f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{
110f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   return toUChar(((Char)v) >> n);
111f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root}
112f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root
113b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline UShort qnarrow32Sto16U ( UInt xx0 )
114b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
115b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   Int xx = (Int)xx0;
116b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   if (xx < 0)     xx = 0;
117b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   if (xx > 65535) xx = 65535;
118b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   return (UShort)xx;
119b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
120b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
121b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline UShort narrow32to16 ( UInt xx )
122b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
123b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   return (UShort)xx;
124b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
125b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
126b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovstatic inline UChar narrow16to8 ( UShort xx )
127b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
128b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   return (UChar)xx;
129b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
130b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
131b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
132b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
133b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                              V128* argL, V128* argR )
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[1] = mul32(argL->w32[1], argR->w32[1]);
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[2] = mul32(argL->w32[2], argR->w32[2]);
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
142b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
143b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[1] = max32S(argL->w32[1], argR->w32[1]);
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[2] = max32S(argL->w32[2], argR->w32[2]);
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
152b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
153b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[1] = min32S(argL->w32[1], argR->w32[1]);
158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[2] = min32S(argL->w32[2], argR->w32[2]);
159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
162b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
163b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[1] = max32U(argL->w32[1], argR->w32[1]);
168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[2] = max32U(argL->w32[2], argR->w32[2]);
169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
172b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
173b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[1] = min32U(argL->w32[1], argR->w32[1]);
178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[2] = min32U(argL->w32[2], argR->w32[2]);
179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
182b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
183b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[1] = max16U(argL->w16[1], argR->w16[1]);
188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[2] = max16U(argL->w16[2], argR->w16[2]);
189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[3] = max16U(argL->w16[3], argR->w16[3]);
190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[4] = max16U(argL->w16[4], argR->w16[4]);
191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[5] = max16U(argL->w16[5], argR->w16[5]);
192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[6] = max16U(argL->w16[6], argR->w16[6]);
193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
195ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
196b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
197b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
200ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[1] = min16U(argL->w16[1], argR->w16[1]);
202ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[2] = min16U(argL->w16[2], argR->w16[2]);
203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[3] = min16U(argL->w16[3], argR->w16[3]);
204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[4] = min16U(argL->w16[4], argR->w16[4]);
205ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[5] = min16U(argL->w16[5], argR->w16[5]);
206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[6] = min16U(argL->w16[6], argR->w16[6]);
207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
209ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
210b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
211b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]);
216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]);
217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]);
218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]);
219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]);
220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]);
221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]);
222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]);
223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]);
224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[10] = max8S(argL->w8[10], argR->w8[10]);
225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[11] = max8S(argL->w8[11], argR->w8[11]);
226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[12] = max8S(argL->w8[12], argR->w8[12]);
227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[13] = max8S(argL->w8[13], argR->w8[13]);
228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[14] = max8S(argL->w8[14], argR->w8[14]);
229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
232b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
233b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                               V128* argL, V128* argR )
235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]);
238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]);
239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]);
240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]);
241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]);
242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]);
243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]);
244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]);
245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]);
246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[10] = min8S(argL->w8[10], argR->w8[10]);
247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[11] = min8S(argL->w8[11], argR->w8[11]);
248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[12] = min8S(argL->w8[12], argR->w8[12]);
249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[13] = min8S(argL->w8[13], argR->w8[13]);
250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[14] = min8S(argL->w8[14], argR->w8[14]);
251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
254b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
255b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_CmpEQ64x2 ( /*OUT*/V128* res,
256b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                                V128* argL, V128* argR )
257b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
258b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w64[0] = cmpEQ64(argL->w64[0], argR->w64[0]);
259b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w64[1] = cmpEQ64(argL->w64[1], argR->w64[1]);
260b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
261b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
262b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
263b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                 V128* argL, V128* argR )
265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]);
268ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
269ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
270f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root/* ------------ Shifting ------------ */
271f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root/* Note that because these primops are undefined if the shift amount
272f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   equals or exceeds the lane width, the shift amount is masked so
273f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   that the scalar shifts are always in range.  In fact, given the
274f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   semantics of these primops (Sar64x2, etc) it is an error if in
275f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   fact we are ever given an out-of-range shift amount.
276f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root*/
277b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid /*not-regparm*/
278b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_SarN64x2 ( /*OUT*/V128* res,
279f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root                               V128* argL, UInt nn)
280f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{
281f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   /* vassert(nn < 64); */
282f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   nn &= 63;
283f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w64[0] = sar64(argL->w64[0], nn);
284f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w64[1] = sar64(argL->w64[1], nn);
285f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root}
286f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root
287b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid /*not-regparm*/
288b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_SarN8x16 ( /*OUT*/V128* res,
289f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root                              V128* argL, UInt nn)
290f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root{
291f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   /* vassert(nn < 8); */
292f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   nn &= 7;
293f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 0] = sar8(argL->w8[ 0], nn);
294f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 1] = sar8(argL->w8[ 1], nn);
295f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 2] = sar8(argL->w8[ 2], nn);
296f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 3] = sar8(argL->w8[ 3], nn);
297f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 4] = sar8(argL->w8[ 4], nn);
298f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 5] = sar8(argL->w8[ 5], nn);
299f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 6] = sar8(argL->w8[ 6], nn);
300f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 7] = sar8(argL->w8[ 7], nn);
301f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 8] = sar8(argL->w8[ 8], nn);
302f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[ 9] = sar8(argL->w8[ 9], nn);
303f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[10] = sar8(argL->w8[10], nn);
304f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[11] = sar8(argL->w8[11], nn);
305f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[12] = sar8(argL->w8[12], nn);
306f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[13] = sar8(argL->w8[13], nn);
307f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[14] = sar8(argL->w8[14], nn);
308f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root   res->w8[15] = sar8(argL->w8[15], nn);
309f673d1bf8bfb172f0eccbe4d3a908b3c65b55b33Kenny Root}
310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
311b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
312b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_QNarrowBin32Sto16Ux8 ( /*OUT*/V128* res,
313b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                                           V128* argL, V128* argR )
314b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
315b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[0] = qnarrow32Sto16U(argR->w32[0]);
316b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[1] = qnarrow32Sto16U(argR->w32[1]);
317b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[2] = qnarrow32Sto16U(argR->w32[2]);
318b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[3] = qnarrow32Sto16U(argR->w32[3]);
319b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[4] = qnarrow32Sto16U(argL->w32[0]);
320b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[5] = qnarrow32Sto16U(argL->w32[1]);
321b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[6] = qnarrow32Sto16U(argL->w32[2]);
322b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[7] = qnarrow32Sto16U(argL->w32[3]);
323b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
324b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
325b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
326b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_NarrowBin16to8x16 ( /*OUT*/V128* res,
327b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                                        V128* argL, V128* argR )
328b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
329b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 0] = narrow16to8(argR->w16[0]);
330b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 1] = narrow16to8(argR->w16[1]);
331b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 2] = narrow16to8(argR->w16[2]);
332b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 3] = narrow16to8(argR->w16[3]);
333b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 4] = narrow16to8(argR->w16[4]);
334b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 5] = narrow16to8(argR->w16[5]);
335b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 6] = narrow16to8(argR->w16[6]);
336b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 7] = narrow16to8(argR->w16[7]);
337b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 8] = narrow16to8(argL->w16[0]);
338b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[ 9] = narrow16to8(argL->w16[1]);
339b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[10] = narrow16to8(argL->w16[2]);
340b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[11] = narrow16to8(argL->w16[3]);
341b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[12] = narrow16to8(argL->w16[4]);
342b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[13] = narrow16to8(argL->w16[5]);
343b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[14] = narrow16to8(argL->w16[6]);
344b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w8[15] = narrow16to8(argL->w16[7]);
345b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
346b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
347b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanovvoid VEX_REGPARM(3)
348b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov     h_generic_calc_NarrowBin32to16x8 ( /*OUT*/V128* res,
349b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov                                        V128* argL, V128* argR )
350b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov{
351b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[0] = narrow32to16(argR->w32[0]);
352b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[1] = narrow32to16(argR->w32[1]);
353b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[2] = narrow32to16(argR->w32[2]);
354b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[3] = narrow32to16(argR->w32[3]);
355b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[4] = narrow32to16(argL->w32[0]);
356b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[5] = narrow32to16(argL->w32[1]);
357b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[6] = narrow32to16(argL->w32[2]);
358b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov   res->w16[7] = narrow32to16(argL->w32[3]);
359b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov}
360b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
361b32f58018498ea2225959b0ba11c18f0c433deefEvgeniy Stepanov
362ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
363ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end                              host_generic_simd128.c ---*/
364ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
365