host_generic_simd64.c revision 5f438dd73072211989c6d496845bdc9b777ecbec
138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
3752f90673ebbb6b2f55fc5e46606dea371313713sewardj/*--- begin                             host_generic_simd64.c ---*/
438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*
7752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This file is part of Valgrind, a dynamic binary instrumentation
8752f90673ebbb6b2f55fc5e46606dea371313713sewardj   framework.
938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
10752f90673ebbb6b2f55fc5e46606dea371313713sewardj   Copyright (C) 2004-2010 OpenWorks LLP
11752f90673ebbb6b2f55fc5e46606dea371313713sewardj      info@open-works.net
127bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
13752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This program is free software; you can redistribute it and/or
14752f90673ebbb6b2f55fc5e46606dea371313713sewardj   modify it under the terms of the GNU General Public License as
15752f90673ebbb6b2f55fc5e46606dea371313713sewardj   published by the Free Software Foundation; either version 2 of the
16752f90673ebbb6b2f55fc5e46606dea371313713sewardj   License, or (at your option) any later version.
177bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
18752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This program is distributed in the hope that it will be useful, but
19752f90673ebbb6b2f55fc5e46606dea371313713sewardj   WITHOUT ANY WARRANTY; without even the implied warranty of
20752f90673ebbb6b2f55fc5e46606dea371313713sewardj   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21752f90673ebbb6b2f55fc5e46606dea371313713sewardj   General Public License for more details.
22752f90673ebbb6b2f55fc5e46606dea371313713sewardj
23752f90673ebbb6b2f55fc5e46606dea371313713sewardj   You should have received a copy of the GNU General Public License
24752f90673ebbb6b2f55fc5e46606dea371313713sewardj   along with this program; if not, write to the Free Software
25752f90673ebbb6b2f55fc5e46606dea371313713sewardj   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
267bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj   02110-1301, USA.
277bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
28752f90673ebbb6b2f55fc5e46606dea371313713sewardj   The GNU General Public License is contained in the file COPYING.
2938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
3038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Neither the names of the U.S. Department of Energy nor the
3138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   University of California nor the names of its contributors may be
3238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   used to endorse or promote products derived from this software
3338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   without prior written permission.
3438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/
3538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
3638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
3738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   where the instruction selectors cannot generate code in-line.
3838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   These are purely back-end entities and cannot be seen/referenced
3938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   from IR. */
4038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj#include "libvex_basictypes.h"
42cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "host_generic_simd64.h"
4338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 32x2 vectors. */
4738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk32x2 ( UInt w1, UInt w0 ) {
4938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (((ULong)w1) << 32) | ((ULong)w0);
5038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
5138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
5238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_1 ( ULong w64 ) {
53d19fc161147086f31126fef0955b426b4f843d02sewardj   return 0xFFFFFFFF & toUInt(w64 >> 32);
5438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
5538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_0 ( ULong w64 ) {
56d19fc161147086f31126fef0955b426b4f843d02sewardj   return 0xFFFFFFFF & toUInt(w64);
5738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
5838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
5938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
6038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 16x4 vectors.  gcc is pretty hopeless
6138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   with 64-bit shifts so we give it a hand. */
6238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
6338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk16x4 ( UShort w3, UShort w2,
6438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj                             UShort w1, UShort w0 ) {
6538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2);
6638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0);
6738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(hi32, lo32);
6838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
6938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_3 ( ULong w64 ) {
71d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
72d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & (hi32 >> 16));
7338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
7438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_2 ( ULong w64 ) {
75d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
76d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & hi32);
7738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
7838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_1 ( ULong w64 ) {
7938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
80d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & (lo32 >> 16));
8138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
8238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_0 ( ULong w64 ) {
8338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
84d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & lo32);
8538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
8638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
8738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
8838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 8x8 vectors. */
8938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
9038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk8x8 ( UChar w7, UChar w6,
9138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj                            UChar w5, UChar w4,
9238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj                            UChar w3, UChar w2,
93e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                            UChar w1, UChar w0 ) {
9438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt hi32 =   (((UInt)w7) << 24) | (((UInt)w6) << 16)
9538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj               | (((UInt)w5) << 8)  | (((UInt)w4) << 0);
9638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 =   (((UInt)w3) << 24) | (((UInt)w2) << 16)
9738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj               | (((UInt)w1) << 8)  | (((UInt)w0) << 0);
9838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(hi32, lo32);
9938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
10038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
10138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_7 ( ULong w64 ) {
102d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
103d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 24));
10438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
10538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_6 ( ULong w64 ) {
106d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
107d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 16));
10838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
10938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_5 ( ULong w64 ) {
110d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
111d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 8));
11238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
11338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_4 ( ULong w64 ) {
114d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
115d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 0));
11638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
11738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_3 ( ULong w64 ) {
11838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
119d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 24));
12038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
12138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_2 ( ULong w64 ) {
12238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
123d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 16));
12438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
12538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_1 ( ULong w64 ) {
12638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
127d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 8));
12838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
12938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_0 ( ULong w64 ) {
13038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
131d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 0));
13238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
13338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
134d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar index8x8 ( ULong w64, UChar ix ) {
135d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   ix &= 7;
136d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return toUChar((w64 >> (8*ix)) & 0xFF);
137d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
138d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
13938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
14038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Scalar helpers. */
14138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
14238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qadd16S ( Short xx, Short yy )
14338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
14438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) + ((Int)yy);
14538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -32768) t = -32768;
14638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 32767)  t = 32767;
14738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
14838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
14938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
15038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qadd8S ( Char xx, Char yy )
15138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
15238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) + ((Int)yy);
15338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -128) t = -128;
15438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 127)  t = 127;
15538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Char)t;
15638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
15738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
15838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qadd16U ( UShort xx, UShort yy )
15938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
16038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt t = ((UInt)xx) + ((UInt)yy);
16138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFFFF) t = 0xFFFF;
16238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)t;
16338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
16438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
16538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qadd8U ( UChar xx, UChar yy )
16638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
16738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt t = ((UInt)xx) + ((UInt)yy);
16838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFF) t = 0xFF;
16938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)t;
17038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
17138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
17238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qsub16S ( Short xx, Short yy )
17338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
17438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
17538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -32768) t = -32768;
17638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 32767)  t = 32767;
17738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
17838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
17938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
18038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qsub8S ( Char xx, Char yy )
18138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
18238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
18338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -128) t = -128;
18438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 127)  t = 127;
18538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Char)t;
18638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
18738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
18838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qsub16U ( UShort xx, UShort yy )
18938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
19038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
19138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < 0)      t = 0;
19238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFFFF) t = 0xFFFF;
19338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)t;
19438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
19538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
19638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qsub8U ( UChar xx, UChar yy )
19738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
19838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
19938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < 0)    t = 0;
20038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFF) t = 0xFF;
20138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)t;
20238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
20338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
20438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mul16 ( Short xx, Short yy )
20538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
20638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) * ((Int)yy);
20738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
20838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
20938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
210d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline Int mul32 ( Int xx, Int yy )
211d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
212d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   Int t = ((Int)xx) * ((Int)yy);
213d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return (Int)t;
214d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
215d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
21638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mulhi16S ( Short xx, Short yy )
21738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
21838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) * ((Int)yy);
21938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   t >>=/*s*/ 16;
22038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
22138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
22238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
22338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort mulhi16U ( UShort xx, UShort yy )
22438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
22538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt t = ((UInt)xx) * ((UInt)yy);
22638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   t >>=/*u*/ 16;
22738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)t;
22838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
22938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
23038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpeq32 ( UInt xx, UInt yy )
23138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
23238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return xx==yy ? 0xFFFFFFFF : 0;
23338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
23438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
23538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpeq16 ( UShort xx, UShort yy )
23638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
237d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(xx==yy ? 0xFFFF : 0);
23838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
23938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
24038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpeq8 ( UChar xx, UChar yy )
24138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
242d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(xx==yy ? 0xFF : 0);
24338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
24438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
24538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpgt32S ( Int xx, Int yy )
24638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
24738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return xx>yy ? 0xFFFFFFFF : 0;
24838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
24938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
25038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpgt16S ( Short xx, Short yy )
25138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
252d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(xx>yy ? 0xFFFF : 0);
25338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
25438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
25538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpgt8S ( Char xx, Char yy )
25638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
257d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(xx>yy ? 0xFF : 0);
25838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
25938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
2601806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UInt cmpnez32 ( UInt xx )
2611806918ae2783af5808f00876581e01c7b650a0dsewardj{
2621806918ae2783af5808f00876581e01c7b650a0dsewardj   return xx==0 ? 0 : 0xFFFFFFFF;
2631806918ae2783af5808f00876581e01c7b650a0dsewardj}
2641806918ae2783af5808f00876581e01c7b650a0dsewardj
2651806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UShort cmpnez16 ( UShort xx )
2661806918ae2783af5808f00876581e01c7b650a0dsewardj{
267d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(xx==0 ? 0 : 0xFFFF);
2681806918ae2783af5808f00876581e01c7b650a0dsewardj}
2691806918ae2783af5808f00876581e01c7b650a0dsewardj
2701806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UChar cmpnez8 ( UChar xx )
2711806918ae2783af5808f00876581e01c7b650a0dsewardj{
272d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(xx==0 ? 0 : 0xFF);
2731806918ae2783af5808f00876581e01c7b650a0dsewardj}
2741806918ae2783af5808f00876581e01c7b650a0dsewardj
275c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Short qnarrow32Sto16S ( UInt xx0 )
27638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
27738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int xx = (Int)xx0;
27838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx < -32768) xx = -32768;
27938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx > 32767)  xx = 32767;
28038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)xx;
28138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
28238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
283c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Char qnarrow16Sto8S ( UShort xx0 )
28438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
28538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Short xx = (Short)xx0;
28638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx < -128) xx = -128;
28738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx > 127)  xx = 127;
28838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Char)xx;
28938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
29038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
291c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline UChar qnarrow16Sto8U ( UShort xx0 )
29238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
29338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Short xx = (Short)xx0;
29438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx < 0)   xx = 0;
29538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx > 255) xx = 255;
29638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)xx;
29738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
29838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
29938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* shifts: we don't care about out-of-range ones, since
30038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   that is dealt with at a higher level. */
30138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
302d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar shl8 ( UChar v, UInt n )
303d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
304d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return toUChar(v << n);
305d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
306d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
307d71ba837242cc470f622335b1c650bce8886a533sewardjstatic inline UChar sar8 ( UChar v, UInt n )
308d71ba837242cc470f622335b1c650bce8886a533sewardj{
309d71ba837242cc470f622335b1c650bce8886a533sewardj   return toUChar(((Char)v) >> n);
310d71ba837242cc470f622335b1c650bce8886a533sewardj}
311d71ba837242cc470f622335b1c650bce8886a533sewardj
31238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shl16 ( UShort v, UInt n )
31338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
314d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(v << n);
31538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
31638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
31738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shr16 ( UShort v, UInt n )
31838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
319d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort((((UShort)v) >> n));
32038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
32138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
32238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sar16 ( UShort v, UInt n )
32338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
324d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(((Short)v) >> n);
32538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
32638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
32738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shl32 ( UInt v, UInt n )
32838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
32938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return v << n;
33038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
33138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
33238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shr32 ( UInt v, UInt n )
33338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
33438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (((UInt)v) >> n);
33538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
33638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
33738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sar32 ( UInt v, UInt n )
33838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
33938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return ((Int)v) >> n;
34038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
34138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
34238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar avg8U ( UChar xx, UChar yy )
34338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
34438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt xxi = (UInt)xx;
34538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt yyi = (UInt)yy;
34638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt r   = (xxi + yyi + 1) >> 1;
34738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)r;
34838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
34938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
35038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort avg16U ( UShort xx, UShort yy )
35138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
35238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt xxi = (UInt)xx;
35338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt yyi = (UInt)yy;
35438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt r   = (xxi + yyi + 1) >> 1;
35538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)r;
35638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
35738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
35838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short max16S ( Short xx, Short yy )
35938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
360d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort((xx > yy) ? xx : yy);
36138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
36238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
36338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar max8U ( UChar xx, UChar yy )
36438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
365d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar((xx > yy) ? xx : yy);
36638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
36738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
36838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short min16S ( Short xx, Short yy )
36938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
370d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort((xx < yy) ? xx : yy);
37138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
37238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
37338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar min8U ( UChar xx, UChar yy )
37438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
375d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar((xx < yy) ? xx : yy);
37638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
37738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
378e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hadd16U ( UShort xx, UShort yy )
379e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
380e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
381e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
382e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi + yyi) >> 1;
383e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UShort)r;
384e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
385e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
386e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hadd16S ( Short xx, Short yy )
387e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
388e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
389e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
390e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi + yyi) >> 1;
391e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Short)r;
392e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
393e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
394e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hsub16U ( UShort xx, UShort yy )
395e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
396e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
397e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
398e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi - yyi) >> 1;
399e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UShort)r;
400e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
401e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
402e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hsub16S ( Short xx, Short yy )
403e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
404e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
405e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
406e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi - yyi) >> 1;
407e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Short)r;
408e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
409e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
410e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hadd8U ( UChar xx, UChar yy )
411e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
412e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
413e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
414e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi + yyi) >> 1;
415e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UChar)r;
416e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
417e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
418e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hadd8S ( Char xx, Char yy )
419e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
420e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
421e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
422e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi + yyi) >> 1;
423e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Char)r;
424e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
425e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
426e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hsub8U ( UChar xx, UChar yy )
427e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
428e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
429e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
430e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi - yyi) >> 1;
431e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UChar)r;
432e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
433e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
434e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hsub8S ( Char xx, Char yy )
435e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
436e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
437e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
438e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi - yyi) >> 1;
439e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Char)r;
440e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
441e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
442310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjstatic inline UInt absdiff8U ( UChar xx, UChar yy )
443310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{
444310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   UInt xxu = (UChar)xx;
445310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   UInt yyu = (UChar)yy;
446310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   return xxu >= yyu  ? xxu - yyu  : yyu - xxu;
447310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj}
448e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
44938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */
45038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Start of the externally visible functions.  These simply
45138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   implement the corresponding IR primops. */
45238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */
45338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
45438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal addition ------------ */
45538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
45638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add32x2 ( ULong xx, ULong yy )
45738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
45838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
45938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(xx) + sel32x2_1(yy),
46038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(xx) + sel32x2_0(yy)
46138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
46238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
46338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
46438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add16x4 ( ULong xx, ULong yy )
46538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
46638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
467d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_3(xx) + sel16x4_3(yy) ),
468d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_2(xx) + sel16x4_2(yy) ),
469d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_1(xx) + sel16x4_1(yy) ),
470d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_0(xx) + sel16x4_0(yy) )
47138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
47238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
47338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
47438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add8x8 ( ULong xx, ULong yy )
47538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
47638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
477d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_7(xx) + sel8x8_7(yy) ),
478d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_6(xx) + sel8x8_6(yy) ),
479d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_5(xx) + sel8x8_5(yy) ),
480d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_4(xx) + sel8x8_4(yy) ),
481d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_3(xx) + sel8x8_3(yy) ),
482d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_2(xx) + sel8x8_2(yy) ),
483d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_1(xx) + sel8x8_1(yy) ),
484d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_0(xx) + sel8x8_0(yy) )
48538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
48638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
48738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
48838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating addition ------------ */
48938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
49038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy )
49138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
49238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
49338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_3(xx), sel16x4_3(yy) ),
49438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_2(xx), sel16x4_2(yy) ),
49538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_1(xx), sel16x4_1(yy) ),
49638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_0(xx), sel16x4_0(yy) )
49738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
49838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
49938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
50038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy )
50138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
50238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
50338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_7(xx), sel8x8_7(yy) ),
50438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_6(xx), sel8x8_6(yy) ),
50538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_5(xx), sel8x8_5(yy) ),
50638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_4(xx), sel8x8_4(yy) ),
50738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_3(xx), sel8x8_3(yy) ),
50838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_2(xx), sel8x8_2(yy) ),
50938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_1(xx), sel8x8_1(yy) ),
51038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_0(xx), sel8x8_0(yy) )
51138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
51238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
51338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
51438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy )
51538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
51638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
51738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_3(xx), sel16x4_3(yy) ),
51838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_2(xx), sel16x4_2(yy) ),
51938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_1(xx), sel16x4_1(yy) ),
52038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_0(xx), sel16x4_0(yy) )
52138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
52238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
52338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
52438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy )
52538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
52638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
52738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_7(xx), sel8x8_7(yy) ),
52838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_6(xx), sel8x8_6(yy) ),
52938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_5(xx), sel8x8_5(yy) ),
53038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_4(xx), sel8x8_4(yy) ),
53138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_3(xx), sel8x8_3(yy) ),
53238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_2(xx), sel8x8_2(yy) ),
53338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_1(xx), sel8x8_1(yy) ),
53438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_0(xx), sel8x8_0(yy) )
53538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
53638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
53738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
53838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal subtraction ------------ */
53938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
54038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy )
54138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
54238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
54338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(xx) - sel32x2_1(yy),
54438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(xx) - sel32x2_0(yy)
54538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
54638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
54738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
54838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy )
54938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
55038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
551d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_3(xx) - sel16x4_3(yy) ),
552d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_2(xx) - sel16x4_2(yy) ),
553d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_1(xx) - sel16x4_1(yy) ),
554d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_0(xx) - sel16x4_0(yy) )
55538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
55638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
55738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
55838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy )
55938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
56038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
561d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_7(xx) - sel8x8_7(yy) ),
562d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_6(xx) - sel8x8_6(yy) ),
563d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_5(xx) - sel8x8_5(yy) ),
564d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_4(xx) - sel8x8_4(yy) ),
565d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_3(xx) - sel8x8_3(yy) ),
566d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_2(xx) - sel8x8_2(yy) ),
567d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_1(xx) - sel8x8_1(yy) ),
568d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_0(xx) - sel8x8_0(yy) )
56938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
57038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
57138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
57238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating subtraction ------------ */
57338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
57438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy )
57538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
57638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
57738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_3(xx), sel16x4_3(yy) ),
57838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_2(xx), sel16x4_2(yy) ),
57938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_1(xx), sel16x4_1(yy) ),
58038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_0(xx), sel16x4_0(yy) )
58138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
58238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
58338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
58438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy )
58538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
58638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
58738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_7(xx), sel8x8_7(yy) ),
58838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_6(xx), sel8x8_6(yy) ),
58938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_5(xx), sel8x8_5(yy) ),
59038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_4(xx), sel8x8_4(yy) ),
59138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_3(xx), sel8x8_3(yy) ),
59238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_2(xx), sel8x8_2(yy) ),
59338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_1(xx), sel8x8_1(yy) ),
59438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_0(xx), sel8x8_0(yy) )
59538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
59638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
59738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
59838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy )
59938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
60038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
60138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_3(xx), sel16x4_3(yy) ),
60238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_2(xx), sel16x4_2(yy) ),
60338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_1(xx), sel16x4_1(yy) ),
60438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_0(xx), sel16x4_0(yy) )
60538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
60638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
60738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
60838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy )
60938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
61038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
61138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_7(xx), sel8x8_7(yy) ),
61238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_6(xx), sel8x8_6(yy) ),
61338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_5(xx), sel8x8_5(yy) ),
61438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_4(xx), sel8x8_4(yy) ),
61538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_3(xx), sel8x8_3(yy) ),
61638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_2(xx), sel8x8_2(yy) ),
61738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_1(xx), sel8x8_1(yy) ),
61838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_0(xx), sel8x8_0(yy) )
61938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
62038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
62138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
62238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Multiplication ------------ */
62338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
62438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy )
62538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
62638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
62738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_3(xx), sel16x4_3(yy) ),
62838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_2(xx), sel16x4_2(yy) ),
62938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_1(xx), sel16x4_1(yy) ),
63038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_0(xx), sel16x4_0(yy) )
63138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
63238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
63338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
634d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy )
635d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
636d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk32x2(
637d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             mul32( sel32x2_1(xx), sel32x2_1(yy) ),
638d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             mul32( sel32x2_0(xx), sel32x2_0(yy) )
639d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
640d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
641d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
64238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
64338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
64438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
64538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ),
64638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ),
64738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ),
64838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_0(xx), sel16x4_0(yy) )
64938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
65038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
65138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
65238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy )
65338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
65438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
65538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ),
65638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ),
65738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ),
65838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_0(xx), sel16x4_0(yy) )
65938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
66038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
66138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
66238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Comparison ------------ */
66338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
66438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy )
66538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
66638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
66738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ),
66838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq32( sel32x2_0(xx), sel32x2_0(yy) )
66938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
67038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
67138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
67238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy )
67338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
67438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
67538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ),
67638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ),
67738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ),
67838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_0(xx), sel16x4_0(yy) )
67938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
68038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
68138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
68238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy )
68338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
68438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
68538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ),
68638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ),
68738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ),
68838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ),
68938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ),
69038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ),
69138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ),
69238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_0(xx), sel8x8_0(yy) )
69338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
69438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
69538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
69638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy )
69738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
69838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
69938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ),
70038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) )
70138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
70238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
70338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
70438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy )
70538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
70638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
70738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ),
70838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ),
70938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ),
71038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) )
71138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
71238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
71338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
71438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy )
71538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
71638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
71738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ),
71838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ),
71938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ),
72038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ),
72138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ),
72238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ),
72338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ),
72438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) )
72538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
72638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
72738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7281806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ32x2 ( ULong xx )
7291806918ae2783af5808f00876581e01c7b650a0dsewardj{
7301806918ae2783af5808f00876581e01c7b650a0dsewardj   return mk32x2(
7311806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez32( sel32x2_1(xx) ),
7321806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez32( sel32x2_0(xx) )
7331806918ae2783af5808f00876581e01c7b650a0dsewardj          );
7341806918ae2783af5808f00876581e01c7b650a0dsewardj}
7351806918ae2783af5808f00876581e01c7b650a0dsewardj
7361806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ16x4 ( ULong xx )
7371806918ae2783af5808f00876581e01c7b650a0dsewardj{
7381806918ae2783af5808f00876581e01c7b650a0dsewardj   return mk16x4(
7391806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_3(xx) ),
7401806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_2(xx) ),
7411806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_1(xx) ),
7421806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_0(xx) )
7431806918ae2783af5808f00876581e01c7b650a0dsewardj          );
7441806918ae2783af5808f00876581e01c7b650a0dsewardj}
7451806918ae2783af5808f00876581e01c7b650a0dsewardj
7461806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ8x8 ( ULong xx )
7471806918ae2783af5808f00876581e01c7b650a0dsewardj{
7481806918ae2783af5808f00876581e01c7b650a0dsewardj   return mk8x8(
7491806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_7(xx) ),
7501806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_6(xx) ),
7511806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_5(xx) ),
7521806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_4(xx) ),
7531806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_3(xx) ),
7541806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_2(xx) ),
7551806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_1(xx) ),
7561806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_0(xx) )
7571806918ae2783af5808f00876581e01c7b650a0dsewardj          );
7581806918ae2783af5808f00876581e01c7b650a0dsewardj}
7591806918ae2783af5808f00876581e01c7b650a0dsewardj
76038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating narrowing ------------ */
76138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7625f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong aa, ULong bb )
76338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
76438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt d = sel32x2_1(aa);
76538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt c = sel32x2_0(aa);
76638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt b = sel32x2_1(bb);
76738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt a = sel32x2_0(bb);
76838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
769c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(d),
770c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(c),
771c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(b),
772c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(a)
77338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
77438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
77538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7765f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong aa, ULong bb )
77738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
77838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort h = sel16x4_3(aa);
77938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort g = sel16x4_2(aa);
78038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort f = sel16x4_1(aa);
78138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort e = sel16x4_0(aa);
78238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort d = sel16x4_3(bb);
78338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort c = sel16x4_2(bb);
78438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort b = sel16x4_1(bb);
78538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort a = sel16x4_0(bb);
78638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
787c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(h),
788c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(g),
789c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(f),
790c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(e),
791c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(d),
792c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(c),
793c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(b),
794c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(a)
79538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
79638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
79738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7985f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb )
79938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
80038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort h = sel16x4_3(aa);
80138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort g = sel16x4_2(aa);
80238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort f = sel16x4_1(aa);
80338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort e = sel16x4_0(aa);
80438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort d = sel16x4_3(bb);
80538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort c = sel16x4_2(bb);
80638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort b = sel16x4_1(bb);
80738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort a = sel16x4_0(bb);
80838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
809c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(h),
810c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(g),
811c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(f),
812c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(e),
813c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(d),
814c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(c),
815c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(b),
816c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(a)
81738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
81838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
81938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
82038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Interleaving ------------ */
82138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
82238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
82338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
82438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
82538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_7(aa),
82638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_7(bb),
82738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_6(aa),
82838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_6(bb),
82938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_5(aa),
83038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_5(bb),
83138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_4(aa),
83238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_4(bb)
83338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
83438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
83538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
83638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb )
83738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
83838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
83938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_3(aa),
84038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_3(bb),
84138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_2(aa),
84238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_2(bb),
84338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_1(aa),
84438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_1(bb),
84538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_0(aa),
84638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_0(bb)
84738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
84838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
84938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
85038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb )
85138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
85238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
85338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_3(aa),
85438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_3(bb),
85538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_2(aa),
85638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_2(bb)
85738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
85838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
85938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
86038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb )
86138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
86238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
86338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_1(aa),
86438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_1(bb),
86538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_0(aa),
86638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_0(bb)
86738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
86838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
86938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
87038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb )
87138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
87238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
87338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(aa),
87438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(bb)
87538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
87638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
87738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
87838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb )
87938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
88038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
88138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(aa),
88238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(bb)
88338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
88438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
88538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
886d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* ------------ Concatenation ------------ */
887d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
888d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb )
889d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
890d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk16x4(
891d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_3(aa),
892d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_1(aa),
893d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_3(bb),
894d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_1(bb)
895d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
896d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
897d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
898d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb )
899d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
900d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk16x4(
901d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_2(aa),
902d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_0(aa),
903d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_2(bb),
904d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_0(bb)
905d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
906d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
907d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
908d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* misc hack looking for a proper home */
909d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb )
910d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
911d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk8x8(
912d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_7(bb)),
913d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_6(bb)),
914d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_5(bb)),
915d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_4(bb)),
916d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_3(bb)),
917d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_2(bb)),
918d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_1(bb)),
919d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_0(bb))
920d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
921d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
92238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
92338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Shifting ------------ */
92438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Note that because these primops are undefined if the shift amount
92538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   equals or exceeds the lane width, the shift amount is masked so
92638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   that the scalar shifts are always in range.  In fact, given the
92738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   semantics of these primops (ShlN16x4, etc) it is an error if in
92838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   fact we are ever given an out-of-range shift amount.
92938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/
93038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn )
93138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
93238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 32); */
93338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 31;
93438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
93538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl32( sel32x2_1(xx), nn ),
93638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl32( sel32x2_0(xx), nn )
93738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
93838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
93938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
94038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn )
94138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
94238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 16); */
94338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 15;
94438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
94538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_3(xx), nn ),
94638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_2(xx), nn ),
94738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_1(xx), nn ),
94838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_0(xx), nn )
94938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
950d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
951d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
952d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_ShlN8x8  ( ULong xx, UInt nn )
953d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
954d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   /* vassert(nn < 8); */
955d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   nn &= 7;
956d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk8x8(
957d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_7(xx), nn ),
958d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_6(xx), nn ),
959d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_5(xx), nn ),
960d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_4(xx), nn ),
961d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_3(xx), nn ),
962d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_2(xx), nn ),
963d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_1(xx), nn ),
964d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_0(xx), nn )
965d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
96638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
96738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
96838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
96938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
97038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 32); */
97138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 31;
97238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
97338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr32( sel32x2_1(xx), nn ),
97438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr32( sel32x2_0(xx), nn )
97538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
97638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
97738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
97838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn )
97938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
98038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 16); */
98138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 15;
98238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
98338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_3(xx), nn ),
98438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_2(xx), nn ),
98538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_1(xx), nn ),
98638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_0(xx), nn )
98738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
98838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
98938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
99038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn )
99138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
99238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 32); */
99338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 31;
99438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
99538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar32( sel32x2_1(xx), nn ),
99638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar32( sel32x2_0(xx), nn )
99738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
99838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
99938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
100038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
100138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
100238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 16); */
100338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 15;
100438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
100538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_3(xx), nn ),
100638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_2(xx), nn ),
100738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_1(xx), nn ),
100838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_0(xx), nn )
100938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
101038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
101138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1012d71ba837242cc470f622335b1c650bce8886a533sewardjULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
1013d71ba837242cc470f622335b1c650bce8886a533sewardj{
1014d71ba837242cc470f622335b1c650bce8886a533sewardj   /* vassert(nn < 8); */
1015d71ba837242cc470f622335b1c650bce8886a533sewardj   nn &= 7;
1016d71ba837242cc470f622335b1c650bce8886a533sewardj   return mk8x8(
1017d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_7(xx), nn ),
1018d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_6(xx), nn ),
1019d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_5(xx), nn ),
1020d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_4(xx), nn ),
1021d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_3(xx), nn ),
1022d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_2(xx), nn ),
1023d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_1(xx), nn ),
1024d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_0(xx), nn )
1025d71ba837242cc470f622335b1c650bce8886a533sewardj          );
1026d71ba837242cc470f622335b1c650bce8886a533sewardj}
1027d71ba837242cc470f622335b1c650bce8886a533sewardj
102838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Averaging ------------ */
102938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
103038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
103138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
103238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
103338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_7(xx), sel8x8_7(yy) ),
103438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_6(xx), sel8x8_6(yy) ),
103538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_5(xx), sel8x8_5(yy) ),
103638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_4(xx), sel8x8_4(yy) ),
103738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_3(xx), sel8x8_3(yy) ),
103838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_2(xx), sel8x8_2(yy) ),
103938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_1(xx), sel8x8_1(yy) ),
104038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_0(xx), sel8x8_0(yy) )
104138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
104238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
104338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
104438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy )
104538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
104638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
104738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_3(xx), sel16x4_3(yy) ),
104838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_2(xx), sel16x4_2(yy) ),
104938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_1(xx), sel16x4_1(yy) ),
105038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_0(xx), sel16x4_0(yy) )
105138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
105238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
105338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
105438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ max/min ------------ */
105538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
105638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy )
105738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
105838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
105938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_3(xx), sel16x4_3(yy) ),
106038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_2(xx), sel16x4_2(yy) ),
106138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_1(xx), sel16x4_1(yy) ),
106238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_0(xx), sel16x4_0(yy) )
106338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
106438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
106538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
106638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy )
106738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
106838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
106938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_7(xx), sel8x8_7(yy) ),
107038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_6(xx), sel8x8_6(yy) ),
107138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_5(xx), sel8x8_5(yy) ),
107238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_4(xx), sel8x8_4(yy) ),
107338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_3(xx), sel8x8_3(yy) ),
107438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_2(xx), sel8x8_2(yy) ),
107538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_1(xx), sel8x8_1(yy) ),
107638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_0(xx), sel8x8_0(yy) )
107738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
107838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
107938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
108038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy )
108138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
108238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
108338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_3(xx), sel16x4_3(yy) ),
108438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_2(xx), sel16x4_2(yy) ),
108538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_1(xx), sel16x4_1(yy) ),
108638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_0(xx), sel16x4_0(yy) )
108738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
108838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
108938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
109038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy )
109138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
109238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
109338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_7(xx), sel8x8_7(yy) ),
109438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_6(xx), sel8x8_6(yy) ),
109538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_5(xx), sel8x8_5(yy) ),
109638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_4(xx), sel8x8_4(yy) ),
109738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_3(xx), sel8x8_3(yy) ),
109838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_2(xx), sel8x8_2(yy) ),
109938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_1(xx), sel8x8_1(yy) ),
110038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_0(xx), sel8x8_0(yy) )
110138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
110238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
110338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1104e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */
1105e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1106e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* Tuple/select functions for 16x2 vectors. */
1107e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk16x2 ( UShort w1, UShort w2 ) {
1108e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (((UInt)w1) << 16) | ((UInt)w2);
1109e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1110e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1111e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_1 ( UInt w32 ) {
1112e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return 0xFFFF & (UShort)(w32 >> 16);
1113e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1114e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_0 ( UInt w32 ) {
1115e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return 0xFFFF & (UShort)(w32);
1116e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1117e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1118e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk8x4 ( UChar w3, UChar w2,
1119e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                           UChar w1, UChar w0 ) {
1120e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt w32 =   (((UInt)w3) << 24) | (((UInt)w2) << 16)
1121e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj              | (((UInt)w1) << 8)  | (((UInt)w0) << 0);
1122e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return w32;
1123e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1124e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1125e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_3 ( UInt w32 ) {
1126e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 24));
1127e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1128e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_2 ( UInt w32 ) {
1129e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 16));
1130e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1131e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_1 ( UInt w32 ) {
1132e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 8));
1133e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1134e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_0 ( UInt w32 ) {
1135e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 0));
1136e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1137e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1138e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1139e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */
1140e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* More externally visible functions.  These simply
1141e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   implement the corresponding IR primops. */
1142e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */
1143e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1144e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 16x2 ------ */
1145e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1146e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add16x2 ( UInt xx, UInt yy )
1147e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1148e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( sel16x2_1(xx) + sel16x2_1(yy),
1149e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  sel16x2_0(xx) + sel16x2_0(yy) );
1150e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1151e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1152e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy )
1153e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1154e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( sel16x2_1(xx) - sel16x2_1(yy),
1155e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  sel16x2_0(xx) - sel16x2_0(yy) );
1156e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1157e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1158e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy )
1159e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1160e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
1161e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1162e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1163e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1164e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy )
1165e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1166e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
1167e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1168e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1169e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1170e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy )
1171e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1172e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
1173e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1174e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1175e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1176e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy )
1177e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1178e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
1179e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1180e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1181e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1182e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy )
1183e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1184e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
1185e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1186e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1187e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1188e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy )
1189e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1190e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
1191e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1192e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1193e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1194e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy )
1195e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1196e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
1197e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1198e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1199e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1200e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy )
1201e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1202e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
1203e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1204e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1205e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1206e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 8x4 ------ */
1207e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1208e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add8x4 ( UInt xx, UInt yy )
1209e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1210e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1211e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_3(xx) + sel8x4_3(yy),
1212e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_2(xx) + sel8x4_2(yy),
1213e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_1(xx) + sel8x4_1(yy),
1214e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_0(xx) + sel8x4_0(yy)
1215e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1216e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1217e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1218e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy )
1219e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1220e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1221e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_3(xx) - sel8x4_3(yy),
1222e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_2(xx) - sel8x4_2(yy),
1223e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_1(xx) - sel8x4_1(yy),
1224e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_0(xx) - sel8x4_0(yy)
1225e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1226e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1227e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1228e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy )
1229e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1230e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1231e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
1232e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
1233e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
1234e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_0(xx), sel8x4_0(yy) )
1235e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1236e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1237e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1238e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy )
1239e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1240e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1241e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
1242e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
1243e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
1244e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_0(xx), sel8x4_0(yy) )
1245e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1246e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1247e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1248e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy )
1249e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1250e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1251e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
1252e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
1253e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
1254e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_0(xx), sel8x4_0(yy) )
1255e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1256e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1257e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1258e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy )
1259e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1260e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1261e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
1262e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
1263e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
1264e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_0(xx), sel8x4_0(yy) )
1265e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1266e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1267e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1268e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy )
1269e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1270e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1271e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
1272e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
1273e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
1274e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_0(xx), sel8x4_0(yy) )
1275e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1276e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1277e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1278e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy )
1279e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1280e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1281e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
1282e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
1283e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
1284e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_0(xx), sel8x4_0(yy) )
1285e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1286e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1287e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1288e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy )
1289e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1290e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1291e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
1292e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
1293e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
1294e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_0(xx), sel8x4_0(yy) )
1295e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1296e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1297e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1298e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy )
1299e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1300e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1301e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
1302e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
1303e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
1304e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_0(xx), sel8x4_0(yy) )
1305e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1306e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1307e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1308e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ16x2 ( UInt xx )
1309e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1310e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2(
1311e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez16( sel16x2_1(xx) ),
1312e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez16( sel16x2_0(xx) )
1313e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1314e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1315e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1316e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ8x4 ( UInt xx )
1317e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1318e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1319e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_3(xx) ),
1320e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_2(xx) ),
1321e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_1(xx) ),
1322e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_0(xx) )
1323e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1324e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
132538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1326310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjUInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy )
1327310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{
1328310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) )
1329310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj          + absdiff8U( sel8x4_2(xx), sel8x4_2(yy) )
1330310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj          + absdiff8U( sel8x4_1(xx), sel8x4_1(yy) )
1331310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj          + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
1332310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj}
1333310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj
1334310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj
133538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
1336cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj/*--- end                               host_generic_simd64.c ---*/
133738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
1338