138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
3752f90673ebbb6b2f55fc5e46606dea371313713sewardj/*--- begin                             host_generic_simd64.c ---*/
438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*
7752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This file is part of Valgrind, a dynamic binary instrumentation
8752f90673ebbb6b2f55fc5e46606dea371313713sewardj   framework.
938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1089ae8477745fd2a15453557d729a50e627325ee2sewardj   Copyright (C) 2004-2013 OpenWorks LLP
11752f90673ebbb6b2f55fc5e46606dea371313713sewardj      info@open-works.net
127bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
13752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This program is free software; you can redistribute it and/or
14752f90673ebbb6b2f55fc5e46606dea371313713sewardj   modify it under the terms of the GNU General Public License as
15752f90673ebbb6b2f55fc5e46606dea371313713sewardj   published by the Free Software Foundation; either version 2 of the
16752f90673ebbb6b2f55fc5e46606dea371313713sewardj   License, or (at your option) any later version.
177bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
18752f90673ebbb6b2f55fc5e46606dea371313713sewardj   This program is distributed in the hope that it will be useful, but
19752f90673ebbb6b2f55fc5e46606dea371313713sewardj   WITHOUT ANY WARRANTY; without even the implied warranty of
20752f90673ebbb6b2f55fc5e46606dea371313713sewardj   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21752f90673ebbb6b2f55fc5e46606dea371313713sewardj   General Public License for more details.
22752f90673ebbb6b2f55fc5e46606dea371313713sewardj
23752f90673ebbb6b2f55fc5e46606dea371313713sewardj   You should have received a copy of the GNU General Public License
24752f90673ebbb6b2f55fc5e46606dea371313713sewardj   along with this program; if not, write to the Free Software
25752f90673ebbb6b2f55fc5e46606dea371313713sewardj   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
267bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj   02110-1301, USA.
277bd6ffe203f3aa9e7b25f7eae40a9b9cf48710cfsewardj
28752f90673ebbb6b2f55fc5e46606dea371313713sewardj   The GNU General Public License is contained in the file COPYING.
2938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
3038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Neither the names of the U.S. Department of Energy nor the
3138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   University of California nor the names of its contributors may be
3238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   used to endorse or promote products derived from this software
3338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   without prior written permission.
3438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/
3538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
3638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
3738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   where the instruction selectors cannot generate code in-line.
3838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   These are purely back-end entities and cannot be seen/referenced
398bde7f1c67483371551aac0d4019c24c919063f7sewardj   from IR.  There are also helpers for 32-bit arithmetic in here. */
4038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj#include "libvex_basictypes.h"
428bde7f1c67483371551aac0d4019c24c919063f7sewardj#include "main_util.h"              // LIKELY, UNLIKELY
43cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj#include "host_generic_simd64.h"
4438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 32x2 vectors. */
4838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
4938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk32x2 ( UInt w1, UInt w0 ) {
5038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (((ULong)w1) << 32) | ((ULong)w0);
5138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
5238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
5338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_1 ( ULong w64 ) {
54d19fc161147086f31126fef0955b426b4f843d02sewardj   return 0xFFFFFFFF & toUInt(w64 >> 32);
5538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
5638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sel32x2_0 ( ULong w64 ) {
57d19fc161147086f31126fef0955b426b4f843d02sewardj   return 0xFFFFFFFF & toUInt(w64);
5838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
5938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
6038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
6138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 16x4 vectors.  gcc is pretty hopeless
6238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   with 64-bit shifts so we give it a hand. */
6338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
6438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk16x4 ( UShort w3, UShort w2,
6538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj                             UShort w1, UShort w0 ) {
6638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt hi32 = (((UInt)w3) << 16) | ((UInt)w2);
6738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (((UInt)w1) << 16) | ((UInt)w0);
6838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(hi32, lo32);
6938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
7038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_3 ( ULong w64 ) {
72d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
73d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & (hi32 >> 16));
7438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
7538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_2 ( ULong w64 ) {
76d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
77d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & hi32);
7838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
7938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_1 ( ULong w64 ) {
8038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
81d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & (lo32 >> 16));
8238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
8338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sel16x4_0 ( ULong w64 ) {
8438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
85d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(0xFFFF & lo32);
8638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
8738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
8838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
8938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Tuple/select functions for 8x8 vectors. */
9038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
9138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline ULong mk8x8 ( UChar w7, UChar w6,
9238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj                            UChar w5, UChar w4,
9338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj                            UChar w3, UChar w2,
94e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                            UChar w1, UChar w0 ) {
9538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt hi32 =   (((UInt)w7) << 24) | (((UInt)w6) << 16)
9638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj               | (((UInt)w5) << 8)  | (((UInt)w4) << 0);
9738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 =   (((UInt)w3) << 24) | (((UInt)w2) << 16)
9838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj               | (((UInt)w1) << 8)  | (((UInt)w0) << 0);
9938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(hi32, lo32);
10038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
10138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
10238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_7 ( ULong w64 ) {
103d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
104d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 24));
10538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
10638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_6 ( ULong w64 ) {
107d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
108d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 16));
10938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
11038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_5 ( ULong w64 ) {
111d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
112d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 8));
11338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
11438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_4 ( ULong w64 ) {
115d19fc161147086f31126fef0955b426b4f843d02sewardj   UInt hi32 = toUInt(w64 >> 32);
116d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (hi32 >> 0));
11738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
11838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_3 ( ULong w64 ) {
11938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
120d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 24));
12138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
12238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_2 ( ULong w64 ) {
12338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
124d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 16));
12538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
12638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_1 ( ULong w64 ) {
12738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
128d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 8));
12938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
13038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar sel8x8_0 ( ULong w64 ) {
13138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt lo32 = (UInt)w64;
132d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(0xFF & (lo32 >> 0));
13338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
13438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
135d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar index8x8 ( ULong w64, UChar ix ) {
136d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   ix &= 7;
137d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return toUChar((w64 >> (8*ix)) & 0xFF);
138d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
139d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
14038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
14138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Scalar helpers. */
14238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
14344ce46d5945ed83d96695d280510cc2a858894dcsewardjstatic inline Int qadd32S ( Int xx, Int yy )
14444ce46d5945ed83d96695d280510cc2a858894dcsewardj{
14544ce46d5945ed83d96695d280510cc2a858894dcsewardj   Long t = ((Long)xx) + ((Long)yy);
14644ce46d5945ed83d96695d280510cc2a858894dcsewardj   const Long loLim = -0x80000000LL;
14744ce46d5945ed83d96695d280510cc2a858894dcsewardj   const Long hiLim =  0x7FFFFFFFLL;
14844ce46d5945ed83d96695d280510cc2a858894dcsewardj   if (t < loLim) t = loLim;
14944ce46d5945ed83d96695d280510cc2a858894dcsewardj   if (t > hiLim) t = hiLim;
15044ce46d5945ed83d96695d280510cc2a858894dcsewardj   return (Int)t;
15144ce46d5945ed83d96695d280510cc2a858894dcsewardj}
15244ce46d5945ed83d96695d280510cc2a858894dcsewardj
15338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qadd16S ( Short xx, Short yy )
15438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
15538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) + ((Int)yy);
15638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -32768) t = -32768;
15738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 32767)  t = 32767;
15838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
15938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
16038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
16138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qadd8S ( Char xx, Char yy )
16238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
16338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) + ((Int)yy);
16438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -128) t = -128;
16538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 127)  t = 127;
16638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Char)t;
16738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
16838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
16938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qadd16U ( UShort xx, UShort yy )
17038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
17138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt t = ((UInt)xx) + ((UInt)yy);
17238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFFFF) t = 0xFFFF;
17338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)t;
17438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
17538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
17638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qadd8U ( UChar xx, UChar yy )
17738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
17838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt t = ((UInt)xx) + ((UInt)yy);
17938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFF) t = 0xFF;
18038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)t;
18138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
18238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
18344ce46d5945ed83d96695d280510cc2a858894dcsewardjstatic inline Int qsub32S ( Int xx, Int yy )
18444ce46d5945ed83d96695d280510cc2a858894dcsewardj{
18544ce46d5945ed83d96695d280510cc2a858894dcsewardj   Long t = ((Long)xx) - ((Long)yy);
18644ce46d5945ed83d96695d280510cc2a858894dcsewardj   const Long loLim = -0x80000000LL;
18744ce46d5945ed83d96695d280510cc2a858894dcsewardj   const Long hiLim =  0x7FFFFFFFLL;
18844ce46d5945ed83d96695d280510cc2a858894dcsewardj   if (t < loLim) t = loLim;
18944ce46d5945ed83d96695d280510cc2a858894dcsewardj   if (t > hiLim) t = hiLim;
19044ce46d5945ed83d96695d280510cc2a858894dcsewardj   return (Int)t;
19144ce46d5945ed83d96695d280510cc2a858894dcsewardj}
19244ce46d5945ed83d96695d280510cc2a858894dcsewardj
19338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short qsub16S ( Short xx, Short yy )
19438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
19538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
19638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -32768) t = -32768;
19738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 32767)  t = 32767;
19838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
19938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
20038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
20138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Char qsub8S ( Char xx, Char yy )
20238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
20338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
20438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < -128) t = -128;
20538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 127)  t = 127;
20638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Char)t;
20738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
20838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
20938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort qsub16U ( UShort xx, UShort yy )
21038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
21138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
21238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < 0)      t = 0;
21338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFFFF) t = 0xFFFF;
21438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)t;
21538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
21638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
21738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar qsub8U ( UChar xx, UChar yy )
21838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
21938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) - ((Int)yy);
22038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t < 0)    t = 0;
22138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (t > 0xFF) t = 0xFF;
22238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)t;
22338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
22438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
22538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mul16 ( Short xx, Short yy )
22638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
22738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) * ((Int)yy);
22838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
22938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
23038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
231d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline Int mul32 ( Int xx, Int yy )
232d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
233d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   Int t = ((Int)xx) * ((Int)yy);
234d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return (Int)t;
235d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
236d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
23738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short mulhi16S ( Short xx, Short yy )
23838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
23938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int t = ((Int)xx) * ((Int)yy);
24038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   t >>=/*s*/ 16;
24138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)t;
24238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
24338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
24438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort mulhi16U ( UShort xx, UShort yy )
24538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
24638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt t = ((UInt)xx) * ((UInt)yy);
24738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   t >>=/*u*/ 16;
24838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)t;
24938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
25038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
25138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpeq32 ( UInt xx, UInt yy )
25238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
25338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return xx==yy ? 0xFFFFFFFF : 0;
25438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
25538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
25638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpeq16 ( UShort xx, UShort yy )
25738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
258d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(xx==yy ? 0xFFFF : 0);
25938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
26038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
26138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpeq8 ( UChar xx, UChar yy )
26238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
263d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(xx==yy ? 0xFF : 0);
26438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
26538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
26638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt cmpgt32S ( Int xx, Int yy )
26738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
26838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return xx>yy ? 0xFFFFFFFF : 0;
26938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
27038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
27138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort cmpgt16S ( Short xx, Short yy )
27238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
273d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(xx>yy ? 0xFFFF : 0);
27438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
27538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
27638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar cmpgt8S ( Char xx, Char yy )
27738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
278d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(xx>yy ? 0xFF : 0);
27938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
28038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
2811806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UInt cmpnez32 ( UInt xx )
2821806918ae2783af5808f00876581e01c7b650a0dsewardj{
2831806918ae2783af5808f00876581e01c7b650a0dsewardj   return xx==0 ? 0 : 0xFFFFFFFF;
2841806918ae2783af5808f00876581e01c7b650a0dsewardj}
2851806918ae2783af5808f00876581e01c7b650a0dsewardj
2861806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UShort cmpnez16 ( UShort xx )
2871806918ae2783af5808f00876581e01c7b650a0dsewardj{
288d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(xx==0 ? 0 : 0xFFFF);
2891806918ae2783af5808f00876581e01c7b650a0dsewardj}
2901806918ae2783af5808f00876581e01c7b650a0dsewardj
2911806918ae2783af5808f00876581e01c7b650a0dsewardjstatic inline UChar cmpnez8 ( UChar xx )
2921806918ae2783af5808f00876581e01c7b650a0dsewardj{
293d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar(xx==0 ? 0 : 0xFF);
2941806918ae2783af5808f00876581e01c7b650a0dsewardj}
2951806918ae2783af5808f00876581e01c7b650a0dsewardj
296c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Short qnarrow32Sto16S ( UInt xx0 )
29738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
29838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Int xx = (Int)xx0;
29938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx < -32768) xx = -32768;
30038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx > 32767)  xx = 32767;
30138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Short)xx;
30238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
30338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
304c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline Char qnarrow16Sto8S ( UShort xx0 )
30538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
30638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Short xx = (Short)xx0;
30738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx < -128) xx = -128;
30838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx > 127)  xx = 127;
30938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (Char)xx;
31038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
31138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
312c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardjstatic inline UChar qnarrow16Sto8U ( UShort xx0 )
31338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
31438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   Short xx = (Short)xx0;
31538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx < 0)   xx = 0;
31638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   if (xx > 255) xx = 255;
31738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)xx;
31838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
31938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
320ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjstatic inline UShort narrow32to16 ( UInt xx )
321ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{
322ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   return (UShort)xx;
323ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj}
324ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj
325ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjstatic inline UChar narrow16to8 ( UShort xx )
326ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{
327ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   return (UChar)xx;
328ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj}
329ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj
33038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* shifts: we don't care about out-of-range ones, since
33138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   that is dealt with at a higher level. */
33238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
333d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjstatic inline UChar shl8 ( UChar v, UInt n )
334d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
335d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return toUChar(v << n);
336d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
337d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
338d71ba837242cc470f622335b1c650bce8886a533sewardjstatic inline UChar sar8 ( UChar v, UInt n )
339d71ba837242cc470f622335b1c650bce8886a533sewardj{
340d71ba837242cc470f622335b1c650bce8886a533sewardj   return toUChar(((Char)v) >> n);
341d71ba837242cc470f622335b1c650bce8886a533sewardj}
342d71ba837242cc470f622335b1c650bce8886a533sewardj
34338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shl16 ( UShort v, UInt n )
34438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
345d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(v << n);
34638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
34738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
34838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort shr16 ( UShort v, UInt n )
34938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
350d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort((((UShort)v) >> n));
35138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
35238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
35338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort sar16 ( UShort v, UInt n )
35438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
355d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort(((Short)v) >> n);
35638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
35738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
35838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shl32 ( UInt v, UInt n )
35938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
36038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return v << n;
36138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
36238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
36338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt shr32 ( UInt v, UInt n )
36438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
36538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (((UInt)v) >> n);
36638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
36738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
36838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UInt sar32 ( UInt v, UInt n )
36938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
37038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return ((Int)v) >> n;
37138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
37238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
37338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar avg8U ( UChar xx, UChar yy )
37438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
37538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt xxi = (UInt)xx;
37638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt yyi = (UInt)yy;
37738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt r   = (xxi + yyi + 1) >> 1;
37838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UChar)r;
37938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
38038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
38138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UShort avg16U ( UShort xx, UShort yy )
38238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
38338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt xxi = (UInt)xx;
38438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt yyi = (UInt)yy;
38538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt r   = (xxi + yyi + 1) >> 1;
38638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return (UShort)r;
38738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
38838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
38938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short max16S ( Short xx, Short yy )
39038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
391d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort((xx > yy) ? xx : yy);
39238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
39338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
39438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar max8U ( UChar xx, UChar yy )
39538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
396d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar((xx > yy) ? xx : yy);
39738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
39838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
39938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline Short min16S ( Short xx, Short yy )
40038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
401d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUShort((xx < yy) ? xx : yy);
40238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
40338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
40438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjstatic inline UChar min8U ( UChar xx, UChar yy )
40538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
406d19fc161147086f31126fef0955b426b4f843d02sewardj   return toUChar((xx < yy) ? xx : yy);
40738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
40838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
409e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hadd16U ( UShort xx, UShort yy )
410e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
411e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
412e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
413e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi + yyi) >> 1;
414e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UShort)r;
415e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
416e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
417e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hadd16S ( Short xx, Short yy )
418e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
419e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
420e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
421e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi + yyi) >> 1;
422e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Short)r;
423e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
424e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
425e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort hsub16U ( UShort xx, UShort yy )
426e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
427e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
428e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
429e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi - yyi) >> 1;
430e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UShort)r;
431e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
432e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
433e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Short hsub16S ( Short xx, Short yy )
434e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
435e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
436e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
437e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi - yyi) >> 1;
438e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Short)r;
439e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
440e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
441e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hadd8U ( UChar xx, UChar yy )
442e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
443e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
444e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
445e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi + yyi) >> 1;
446e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UChar)r;
447e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
448e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
449e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hadd8S ( Char xx, Char yy )
450e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
451e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
452e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
453e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi + yyi) >> 1;
454e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Char)r;
455e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
456e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
457e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar hsub8U ( UChar xx, UChar yy )
458e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
459e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt xxi = (UInt)xx;
460e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt yyi = (UInt)yy;
461e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt r   = (xxi - yyi) >> 1;
462e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (UChar)r;
463e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
464e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
465e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline Char hsub8S ( Char xx, Char yy )
466e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
467e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int xxi = (Int)xx;
468e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int yyi = (Int)yy;
469e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   Int r   = (xxi - yyi) >> 1;
470e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (Char)r;
471e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
472e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
473310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjstatic inline UInt absdiff8U ( UChar xx, UChar yy )
474310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{
475310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   UInt xxu = (UChar)xx;
476310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   UInt yyu = (UChar)yy;
477310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   return xxu >= yyu  ? xxu - yyu  : yyu - xxu;
478310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj}
479e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
48038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */
48138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Start of the externally visible functions.  These simply
48238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   implement the corresponding IR primops. */
48338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ----------------------------------------------------- */
48438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
48538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal addition ------------ */
48638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
48738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add32x2 ( ULong xx, ULong yy )
48838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
48938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
49038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(xx) + sel32x2_1(yy),
49138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(xx) + sel32x2_0(yy)
49238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
49338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
49438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
49538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add16x4 ( ULong xx, ULong yy )
49638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
49738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
498d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_3(xx) + sel16x4_3(yy) ),
499d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_2(xx) + sel16x4_2(yy) ),
500d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_1(xx) + sel16x4_1(yy) ),
501d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_0(xx) + sel16x4_0(yy) )
50238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
50338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
50438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
50538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Add8x8 ( ULong xx, ULong yy )
50638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
50738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
508d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_7(xx) + sel8x8_7(yy) ),
509d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_6(xx) + sel8x8_6(yy) ),
510d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_5(xx) + sel8x8_5(yy) ),
511d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_4(xx) + sel8x8_4(yy) ),
512d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_3(xx) + sel8x8_3(yy) ),
513d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_2(xx) + sel8x8_2(yy) ),
514d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_1(xx) + sel8x8_1(yy) ),
515d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_0(xx) + sel8x8_0(yy) )
51638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
51738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
51838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
51938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating addition ------------ */
52038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
52138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy )
52238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
52338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
52438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_3(xx), sel16x4_3(yy) ),
52538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_2(xx), sel16x4_2(yy) ),
52638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_1(xx), sel16x4_1(yy) ),
52738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16S( sel16x4_0(xx), sel16x4_0(yy) )
52838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
52938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
53038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
53138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy )
53238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
53338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
53438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_7(xx), sel8x8_7(yy) ),
53538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_6(xx), sel8x8_6(yy) ),
53638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_5(xx), sel8x8_5(yy) ),
53738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_4(xx), sel8x8_4(yy) ),
53838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_3(xx), sel8x8_3(yy) ),
53938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_2(xx), sel8x8_2(yy) ),
54038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_1(xx), sel8x8_1(yy) ),
54138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8S( sel8x8_0(xx), sel8x8_0(yy) )
54238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
54338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
54438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
54538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy )
54638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
54738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
54838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_3(xx), sel16x4_3(yy) ),
54938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_2(xx), sel16x4_2(yy) ),
55038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_1(xx), sel16x4_1(yy) ),
55138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd16U( sel16x4_0(xx), sel16x4_0(yy) )
55238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
55338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
55438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
55538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy )
55638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
55738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
55838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_7(xx), sel8x8_7(yy) ),
55938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_6(xx), sel8x8_6(yy) ),
56038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_5(xx), sel8x8_5(yy) ),
56138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_4(xx), sel8x8_4(yy) ),
56238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_3(xx), sel8x8_3(yy) ),
56338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_2(xx), sel8x8_2(yy) ),
56438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_1(xx), sel8x8_1(yy) ),
56538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qadd8U( sel8x8_0(xx), sel8x8_0(yy) )
56638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
56738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
56838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
56938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Normal subtraction ------------ */
57038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
57138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy )
57238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
57338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
57438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(xx) - sel32x2_1(yy),
57538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(xx) - sel32x2_0(yy)
57638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
57738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
57838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
57938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy )
58038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
58138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
582d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_3(xx) - sel16x4_3(yy) ),
583d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_2(xx) - sel16x4_2(yy) ),
584d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_1(xx) - sel16x4_1(yy) ),
585d19fc161147086f31126fef0955b426b4f843d02sewardj             toUShort( sel16x4_0(xx) - sel16x4_0(yy) )
58638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
58738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
58838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
58938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy )
59038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
59138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
592d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_7(xx) - sel8x8_7(yy) ),
593d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_6(xx) - sel8x8_6(yy) ),
594d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_5(xx) - sel8x8_5(yy) ),
595d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_4(xx) - sel8x8_4(yy) ),
596d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_3(xx) - sel8x8_3(yy) ),
597d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_2(xx) - sel8x8_2(yy) ),
598d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_1(xx) - sel8x8_1(yy) ),
599d19fc161147086f31126fef0955b426b4f843d02sewardj             toUChar( sel8x8_0(xx) - sel8x8_0(yy) )
60038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
60138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
60238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
60338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating subtraction ------------ */
60438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
60538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy )
60638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
60738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
60838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_3(xx), sel16x4_3(yy) ),
60938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_2(xx), sel16x4_2(yy) ),
61038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_1(xx), sel16x4_1(yy) ),
61138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16S( sel16x4_0(xx), sel16x4_0(yy) )
61238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
61338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
61438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
61538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy )
61638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
61738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
61838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_7(xx), sel8x8_7(yy) ),
61938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_6(xx), sel8x8_6(yy) ),
62038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_5(xx), sel8x8_5(yy) ),
62138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_4(xx), sel8x8_4(yy) ),
62238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_3(xx), sel8x8_3(yy) ),
62338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_2(xx), sel8x8_2(yy) ),
62438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_1(xx), sel8x8_1(yy) ),
62538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8S( sel8x8_0(xx), sel8x8_0(yy) )
62638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
62738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
62838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
62938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy )
63038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
63138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
63238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_3(xx), sel16x4_3(yy) ),
63338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_2(xx), sel16x4_2(yy) ),
63438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_1(xx), sel16x4_1(yy) ),
63538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub16U( sel16x4_0(xx), sel16x4_0(yy) )
63638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
63738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
63838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
63938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy )
64038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
64138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
64238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_7(xx), sel8x8_7(yy) ),
64338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_6(xx), sel8x8_6(yy) ),
64438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_5(xx), sel8x8_5(yy) ),
64538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_4(xx), sel8x8_4(yy) ),
64638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_3(xx), sel8x8_3(yy) ),
64738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_2(xx), sel8x8_2(yy) ),
64838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_1(xx), sel8x8_1(yy) ),
64938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             qsub8U( sel8x8_0(xx), sel8x8_0(yy) )
65038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
65138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
65238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
65338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Multiplication ------------ */
65438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
65538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy )
65638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
65738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
65838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_3(xx), sel16x4_3(yy) ),
65938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_2(xx), sel16x4_2(yy) ),
66038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_1(xx), sel16x4_1(yy) ),
66138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mul16( sel16x4_0(xx), sel16x4_0(yy) )
66238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
66338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
66438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
665d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy )
666d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
667d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk32x2(
668d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             mul32( sel32x2_1(xx), sel32x2_1(yy) ),
669d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             mul32( sel32x2_0(xx), sel32x2_0(yy) )
670d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
671d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
672d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
67338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
67438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
67538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
67638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ),
67738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ),
67838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ),
67938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16S( sel16x4_0(xx), sel16x4_0(yy) )
68038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
68138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
68238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
68338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy )
68438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
68538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
68638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ),
68738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ),
68838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ),
68938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             mulhi16U( sel16x4_0(xx), sel16x4_0(yy) )
69038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
69138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
69238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
69338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Comparison ------------ */
69438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
69538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy )
69638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
69738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
69838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ),
69938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq32( sel32x2_0(xx), sel32x2_0(yy) )
70038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
70138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
70238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
70338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy )
70438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
70538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
70638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ),
70738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ),
70838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ),
70938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq16( sel16x4_0(xx), sel16x4_0(yy) )
71038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
71138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
71238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
71338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy )
71438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
71538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
71638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ),
71738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ),
71838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ),
71938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ),
72038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ),
72138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ),
72238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ),
72338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpeq8( sel8x8_0(xx), sel8x8_0(yy) )
72438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
72538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
72638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
72738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy )
72838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
72938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
73038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ),
73138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) )
73238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
73338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
73438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
73538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy )
73638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
73738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
73838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ),
73938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ),
74038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ),
74138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) )
74238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
74338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
74438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
74538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy )
74638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
74738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
74838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ),
74938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ),
75038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ),
75138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ),
75238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ),
75338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ),
75438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ),
75538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) )
75638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
75738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
75838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7591806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ32x2 ( ULong xx )
7601806918ae2783af5808f00876581e01c7b650a0dsewardj{
7611806918ae2783af5808f00876581e01c7b650a0dsewardj   return mk32x2(
7621806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez32( sel32x2_1(xx) ),
7631806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez32( sel32x2_0(xx) )
7641806918ae2783af5808f00876581e01c7b650a0dsewardj          );
7651806918ae2783af5808f00876581e01c7b650a0dsewardj}
7661806918ae2783af5808f00876581e01c7b650a0dsewardj
7671806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ16x4 ( ULong xx )
7681806918ae2783af5808f00876581e01c7b650a0dsewardj{
7691806918ae2783af5808f00876581e01c7b650a0dsewardj   return mk16x4(
7701806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_3(xx) ),
7711806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_2(xx) ),
7721806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_1(xx) ),
7731806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez16( sel16x4_0(xx) )
7741806918ae2783af5808f00876581e01c7b650a0dsewardj          );
7751806918ae2783af5808f00876581e01c7b650a0dsewardj}
7761806918ae2783af5808f00876581e01c7b650a0dsewardj
7771806918ae2783af5808f00876581e01c7b650a0dsewardjULong h_generic_calc_CmpNEZ8x8 ( ULong xx )
7781806918ae2783af5808f00876581e01c7b650a0dsewardj{
7791806918ae2783af5808f00876581e01c7b650a0dsewardj   return mk8x8(
7801806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_7(xx) ),
7811806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_6(xx) ),
7821806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_5(xx) ),
7831806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_4(xx) ),
7841806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_3(xx) ),
7851806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_2(xx) ),
7861806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_1(xx) ),
7871806918ae2783af5808f00876581e01c7b650a0dsewardj             cmpnez8( sel8x8_0(xx) )
7881806918ae2783af5808f00876581e01c7b650a0dsewardj          );
7891806918ae2783af5808f00876581e01c7b650a0dsewardj}
7901806918ae2783af5808f00876581e01c7b650a0dsewardj
79138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Saturating narrowing ------------ */
79238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
7935f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong aa, ULong bb )
79438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
79538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt d = sel32x2_1(aa);
79638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt c = sel32x2_0(aa);
79738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt b = sel32x2_1(bb);
79838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UInt a = sel32x2_0(bb);
79938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
800c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(d),
801c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(c),
802c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(b),
803c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow32Sto16S(a)
80438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
80538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
80638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
8075f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong aa, ULong bb )
80838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
80938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort h = sel16x4_3(aa);
81038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort g = sel16x4_2(aa);
81138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort f = sel16x4_1(aa);
81238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort e = sel16x4_0(aa);
81338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort d = sel16x4_3(bb);
81438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort c = sel16x4_2(bb);
81538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort b = sel16x4_1(bb);
81638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort a = sel16x4_0(bb);
81738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
818c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(h),
819c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(g),
820c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(f),
821c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(e),
822c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(d),
823c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(c),
824c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(b),
825c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8S(a)
82638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
82738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
82838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
8295f438dd73072211989c6d496845bdc9b777ecbecsewardjULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb )
83038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
83138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort h = sel16x4_3(aa);
83238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort g = sel16x4_2(aa);
83338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort f = sel16x4_1(aa);
83438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort e = sel16x4_0(aa);
83538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort d = sel16x4_3(bb);
83638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort c = sel16x4_2(bb);
83738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort b = sel16x4_1(bb);
83838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   UShort a = sel16x4_0(bb);
83938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
840c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(h),
841c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(g),
842c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(f),
843c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(e),
844c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(d),
845c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(c),
846c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(b),
847c9bff7dbb37ba2ee5898ef49aefaa92095ab446bsewardj             qnarrow16Sto8U(a)
84838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
84938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
85038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
851ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj/* ------------ Truncating narrowing ------------ */
852ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj
853ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb )
854ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{
855ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UInt d = sel32x2_1(aa);
856ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UInt c = sel32x2_0(aa);
857ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UInt b = sel32x2_1(bb);
858ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UInt a = sel32x2_0(bb);
859ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   return mk16x4(
860ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow32to16(d),
861ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow32to16(c),
862ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow32to16(b),
863ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow32to16(a)
864ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj          );
865ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj}
866ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj
867ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardjULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb )
868ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj{
869ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort h = sel16x4_3(aa);
870ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort g = sel16x4_2(aa);
871ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort f = sel16x4_1(aa);
872ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort e = sel16x4_0(aa);
873ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort d = sel16x4_3(bb);
874ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort c = sel16x4_2(bb);
875ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort b = sel16x4_1(bb);
876ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   UShort a = sel16x4_0(bb);
877ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj   return mk8x8(
878ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(h),
879ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(g),
880ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(f),
881ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(e),
882ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(d),
883ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(c),
884ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(b),
885ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj             narrow16to8(a)
886ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj          );
887ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj}
888ad2c9ea0c360fced134b2dd0d4b28c0be3639cfbsewardj
88938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Interleaving ------------ */
89038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
89138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
89238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
89338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
89438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_7(aa),
89538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_7(bb),
89638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_6(aa),
89738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_6(bb),
89838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_5(aa),
89938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_5(bb),
90038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_4(aa),
90138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_4(bb)
90238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
90338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
90438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
90538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb )
90638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
90738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
90838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_3(aa),
90938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_3(bb),
91038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_2(aa),
91138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_2(bb),
91238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_1(aa),
91338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_1(bb),
91438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_0(aa),
91538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel8x8_0(bb)
91638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
91738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
91838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
91938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb )
92038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
92138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
92238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_3(aa),
92338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_3(bb),
92438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_2(aa),
92538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_2(bb)
92638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
92738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
92838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
92938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb )
93038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
93138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
93238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_1(aa),
93338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_1(bb),
93438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_0(aa),
93538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel16x4_0(bb)
93638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
93738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
93838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
93938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb )
94038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
94138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
94238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(aa),
94338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_1(bb)
94438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
94538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
94638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
94738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb )
94838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
94938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
95038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(aa),
95138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sel32x2_0(bb)
95238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
95338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
95438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
955d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* ------------ Concatenation ------------ */
956d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
957d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb )
958d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
959d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk16x4(
960d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_3(aa),
961d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_1(aa),
962d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_3(bb),
963d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_1(bb)
964d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
965d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
966d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
967d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb )
968d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
969d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk16x4(
970d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_2(aa),
971d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_0(aa),
972d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_2(bb),
973d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             sel16x4_0(bb)
974d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
975d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
976d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
977d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj/* misc hack looking for a proper home */
978d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb )
979d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
980d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk8x8(
981d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_7(bb)),
982d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_6(bb)),
983d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_5(bb)),
984d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_4(bb)),
985d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_3(bb)),
986d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_2(bb)),
987d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_1(bb)),
988d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             index8x8(aa, sel8x8_0(bb))
989d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
990d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
99138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
99238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Shifting ------------ */
99338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* Note that because these primops are undefined if the shift amount
99438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   equals or exceeds the lane width, the shift amount is masked so
99538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   that the scalar shifts are always in range.  In fact, given the
99638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   semantics of these primops (ShlN16x4, etc) it is an error if in
99738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   fact we are ever given an out-of-range shift amount.
99838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj*/
99938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn )
100038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
100138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 32); */
100238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 31;
100338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
100438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl32( sel32x2_1(xx), nn ),
100538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl32( sel32x2_0(xx), nn )
100638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
100738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
100838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
100938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn )
101038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
101138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 16); */
101238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 15;
101338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
101438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_3(xx), nn ),
101538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_2(xx), nn ),
101638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_1(xx), nn ),
101738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shl16( sel16x4_0(xx), nn )
101838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
1019d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj}
1020d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj
1021d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardjULong h_generic_calc_ShlN8x8  ( ULong xx, UInt nn )
1022d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj{
1023d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   /* vassert(nn < 8); */
1024d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   nn &= 7;
1025d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj   return mk8x8(
1026d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_7(xx), nn ),
1027d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_6(xx), nn ),
1028d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_5(xx), nn ),
1029d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_4(xx), nn ),
1030d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_3(xx), nn ),
1031d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_2(xx), nn ),
1032d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_1(xx), nn ),
1033d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj             shl8( sel8x8_0(xx), nn )
1034d166e2874e0d9a9e567a281d7f1f6e8ef8127196sewardj          );
103538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
103638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
103738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
103838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
103938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 32); */
104038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 31;
104138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
104238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr32( sel32x2_1(xx), nn ),
104338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr32( sel32x2_0(xx), nn )
104438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
104538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
104638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
104738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn )
104838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
104938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 16); */
105038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 15;
105138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
105238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_3(xx), nn ),
105338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_2(xx), nn ),
105438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_1(xx), nn ),
105538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             shr16( sel16x4_0(xx), nn )
105638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
105738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
105838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
105938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn )
106038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
106138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 32); */
106238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 31;
106338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk32x2(
106438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar32( sel32x2_1(xx), nn ),
106538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar32( sel32x2_0(xx), nn )
106638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
106738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
106838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
106938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
107038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
107138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   /* vassert(nn < 16); */
107238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   nn &= 15;
107338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
107438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_3(xx), nn ),
107538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_2(xx), nn ),
107638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_1(xx), nn ),
107738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             sar16( sel16x4_0(xx), nn )
107838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
107938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
108038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1081d71ba837242cc470f622335b1c650bce8886a533sewardjULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
1082d71ba837242cc470f622335b1c650bce8886a533sewardj{
1083d71ba837242cc470f622335b1c650bce8886a533sewardj   /* vassert(nn < 8); */
1084d71ba837242cc470f622335b1c650bce8886a533sewardj   nn &= 7;
1085d71ba837242cc470f622335b1c650bce8886a533sewardj   return mk8x8(
1086d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_7(xx), nn ),
1087d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_6(xx), nn ),
1088d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_5(xx), nn ),
1089d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_4(xx), nn ),
1090d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_3(xx), nn ),
1091d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_2(xx), nn ),
1092d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_1(xx), nn ),
1093d71ba837242cc470f622335b1c650bce8886a533sewardj             sar8( sel8x8_0(xx), nn )
1094d71ba837242cc470f622335b1c650bce8886a533sewardj          );
1095d71ba837242cc470f622335b1c650bce8886a533sewardj}
1096d71ba837242cc470f622335b1c650bce8886a533sewardj
109738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ Averaging ------------ */
109838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
109938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
110038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
110138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
110238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_7(xx), sel8x8_7(yy) ),
110338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_6(xx), sel8x8_6(yy) ),
110438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_5(xx), sel8x8_5(yy) ),
110538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_4(xx), sel8x8_4(yy) ),
110638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_3(xx), sel8x8_3(yy) ),
110738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_2(xx), sel8x8_2(yy) ),
110838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_1(xx), sel8x8_1(yy) ),
110938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg8U( sel8x8_0(xx), sel8x8_0(yy) )
111038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
111138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
111238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
111338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy )
111438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
111538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
111638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_3(xx), sel16x4_3(yy) ),
111738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_2(xx), sel16x4_2(yy) ),
111838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_1(xx), sel16x4_1(yy) ),
111938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             avg16U( sel16x4_0(xx), sel16x4_0(yy) )
112038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
112138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
112238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
112338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/* ------------ max/min ------------ */
112438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
112538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy )
112638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
112738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
112838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_3(xx), sel16x4_3(yy) ),
112938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_2(xx), sel16x4_2(yy) ),
113038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_1(xx), sel16x4_1(yy) ),
113138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max16S( sel16x4_0(xx), sel16x4_0(yy) )
113238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
113338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
113438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
113538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy )
113638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
113738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
113838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_7(xx), sel8x8_7(yy) ),
113938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_6(xx), sel8x8_6(yy) ),
114038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_5(xx), sel8x8_5(yy) ),
114138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_4(xx), sel8x8_4(yy) ),
114238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_3(xx), sel8x8_3(yy) ),
114338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_2(xx), sel8x8_2(yy) ),
114438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_1(xx), sel8x8_1(yy) ),
114538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             max8U( sel8x8_0(xx), sel8x8_0(yy) )
114638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
114738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
114838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
114938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy )
115038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
115138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk16x4(
115238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_3(xx), sel16x4_3(yy) ),
115338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_2(xx), sel16x4_2(yy) ),
115438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_1(xx), sel16x4_1(yy) ),
115538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min16S( sel16x4_0(xx), sel16x4_0(yy) )
115638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
115738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
115838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
115938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardjULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy )
116038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj{
116138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj   return mk8x8(
116238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_7(xx), sel8x8_7(yy) ),
116338a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_6(xx), sel8x8_6(yy) ),
116438a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_5(xx), sel8x8_5(yy) ),
116538a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_4(xx), sel8x8_4(yy) ),
116638a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_3(xx), sel8x8_3(yy) ),
116738a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_2(xx), sel8x8_2(yy) ),
116838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_1(xx), sel8x8_1(yy) ),
116938a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj             min8U( sel8x8_0(xx), sel8x8_0(yy) )
117038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj          );
117138a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj}
117238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1173e13074c2c1321d069fb95806bdce64f9a3512341sewardjUInt h_generic_calc_GetMSBs8x8 ( ULong xx )
1174e13074c2c1321d069fb95806bdce64f9a3512341sewardj{
1175e13074c2c1321d069fb95806bdce64f9a3512341sewardj   UInt r = 0;
1176e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (64-1))) r |= (1<<7);
1177e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (56-1))) r |= (1<<6);
1178e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (48-1))) r |= (1<<5);
1179e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (40-1))) r |= (1<<4);
1180e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (32-1))) r |= (1<<3);
1181e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (24-1))) r |= (1<<2);
1182e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << (16-1))) r |= (1<<1);
1183e13074c2c1321d069fb95806bdce64f9a3512341sewardj   if (xx & (1ULL << ( 8-1))) r |= (1<<0);
1184e13074c2c1321d069fb95806bdce64f9a3512341sewardj   return r;
1185e13074c2c1321d069fb95806bdce64f9a3512341sewardj}
1186e13074c2c1321d069fb95806bdce64f9a3512341sewardj
1187e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */
1188e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1189e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* Tuple/select functions for 16x2 vectors. */
1190e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk16x2 ( UShort w1, UShort w2 ) {
1191e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return (((UInt)w1) << 16) | ((UInt)w2);
1192e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1193e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1194e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_1 ( UInt w32 ) {
1195e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return 0xFFFF & (UShort)(w32 >> 16);
1196e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1197e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UShort sel16x2_0 ( UInt w32 ) {
1198e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return 0xFFFF & (UShort)(w32);
1199e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1200e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1201e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UInt mk8x4 ( UChar w3, UChar w2,
1202e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                           UChar w1, UChar w0 ) {
1203e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   UInt w32 =   (((UInt)w3) << 24) | (((UInt)w2) << 16)
1204e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj              | (((UInt)w1) << 8)  | (((UInt)w0) << 0);
1205e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return w32;
1206e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1207e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1208e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_3 ( UInt w32 ) {
1209e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 24));
1210e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1211e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_2 ( UInt w32 ) {
1212e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 16));
1213e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1214e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_1 ( UInt w32 ) {
1215e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 8));
1216e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1217e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjstatic inline UChar sel8x4_0 ( UInt w32 ) {
1218e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return toUChar(0xFF & (w32 >> 0));
1219e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1220e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1221e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1222e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */
1223e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* More externally visible functions.  These simply
1224e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   implement the corresponding IR primops. */
1225e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ----------------------------------------------------- */
1226e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1227e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 16x2 ------ */
1228e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1229e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add16x2 ( UInt xx, UInt yy )
1230e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1231e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( sel16x2_1(xx) + sel16x2_1(yy),
1232e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  sel16x2_0(xx) + sel16x2_0(yy) );
1233e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1234e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1235e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy )
1236e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1237e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( sel16x2_1(xx) - sel16x2_1(yy),
1238e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  sel16x2_0(xx) - sel16x2_0(yy) );
1239e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1240e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1241e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy )
1242e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1243e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
1244e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1245e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1246e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1247e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy )
1248e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1249e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
1250e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1251e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1252e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1253e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy )
1254e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1255e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
1256e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1257e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1258e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1259e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy )
1260e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1261e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
1262e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1263e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1264e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1265e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy )
1266e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1267e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
1268e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1269e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1270e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1271e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy )
1272e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1273e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
1274e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1275e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1276e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1277e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy )
1278e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1279e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
1280e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
1281e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1282e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1283e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy )
1284e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1285e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
1286e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj                  qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
1287e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1288e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1289e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj/* ------ 8x4 ------ */
1290e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1291e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Add8x4 ( UInt xx, UInt yy )
1292e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1293e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1294e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_3(xx) + sel8x4_3(yy),
1295e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_2(xx) + sel8x4_2(yy),
1296e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_1(xx) + sel8x4_1(yy),
1297e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_0(xx) + sel8x4_0(yy)
1298e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1299e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1300e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1301e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy )
1302e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1303e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1304e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_3(xx) - sel8x4_3(yy),
1305e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_2(xx) - sel8x4_2(yy),
1306e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_1(xx) - sel8x4_1(yy),
1307e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             sel8x4_0(xx) - sel8x4_0(yy)
1308e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1309e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1310e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1311e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy )
1312e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1313e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1314e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
1315e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
1316e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
1317e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8U( sel8x4_0(xx), sel8x4_0(yy) )
1318e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1319e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1320e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1321e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy )
1322e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1323e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1324e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
1325e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
1326e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
1327e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hadd8S( sel8x4_0(xx), sel8x4_0(yy) )
1328e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1329e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1330e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1331e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy )
1332e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1333e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1334e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
1335e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
1336e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
1337e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8U( sel8x4_0(xx), sel8x4_0(yy) )
1338e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1339e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1340e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1341e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy )
1342e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1343e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1344e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
1345e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
1346e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
1347e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             hsub8S( sel8x4_0(xx), sel8x4_0(yy) )
1348e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1349e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1350e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1351e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy )
1352e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1353e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1354e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
1355e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
1356e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
1357e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8U( sel8x4_0(xx), sel8x4_0(yy) )
1358e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1359e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1360e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1361e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy )
1362e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1363e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1364e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
1365e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
1366e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
1367e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qadd8S( sel8x4_0(xx), sel8x4_0(yy) )
1368e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1369e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1370e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1371e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy )
1372e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1373e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1374e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
1375e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
1376e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
1377e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8U( sel8x4_0(xx), sel8x4_0(yy) )
1378e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1379e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1380e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1381e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy )
1382e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1383e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1384e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
1385e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
1386e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
1387e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             qsub8S( sel8x4_0(xx), sel8x4_0(yy) )
1388e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1389e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1390e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1391e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ16x2 ( UInt xx )
1392e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1393e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk16x2(
1394e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez16( sel16x2_1(xx) ),
1395e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez16( sel16x2_0(xx) )
1396e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1397e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
1398e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj
1399e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardjUInt h_generic_calc_CmpNEZ8x4 ( UInt xx )
1400e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj{
1401e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj   return mk8x4(
1402e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_3(xx) ),
1403e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_2(xx) ),
1404e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_1(xx) ),
1405e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj             cmpnez8( sel8x4_0(xx) )
1406e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj          );
1407e2ea17600e3bbdc4cc0edcd2a03f7d1142666f37sewardj}
140838a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj
1409310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardjUInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy )
1410310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj{
1411310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj   return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) )
1412310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj          + absdiff8U( sel8x4_2(xx), sel8x4_2(yy) )
1413310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj          + absdiff8U( sel8x4_1(xx), sel8x4_1(yy) )
1414310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj          + absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
1415310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj}
1416310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj
141744ce46d5945ed83d96695d280510cc2a858894dcsewardjUInt h_generic_calc_QAdd32S ( UInt xx, UInt yy )
141844ce46d5945ed83d96695d280510cc2a858894dcsewardj{
141944ce46d5945ed83d96695d280510cc2a858894dcsewardj   return qadd32S( xx, yy );
142044ce46d5945ed83d96695d280510cc2a858894dcsewardj}
142144ce46d5945ed83d96695d280510cc2a858894dcsewardj
142244ce46d5945ed83d96695d280510cc2a858894dcsewardjUInt h_generic_calc_QSub32S ( UInt xx, UInt yy )
142344ce46d5945ed83d96695d280510cc2a858894dcsewardj{
142444ce46d5945ed83d96695d280510cc2a858894dcsewardj   return qsub32S( xx, yy );
142544ce46d5945ed83d96695d280510cc2a858894dcsewardj}
142644ce46d5945ed83d96695d280510cc2a858894dcsewardj
142744ce46d5945ed83d96695d280510cc2a858894dcsewardj
14284c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/*------------------------------------------------------------------*/
14294c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/* Decimal Floating Point (DFP) externally visible helper functions */
14304c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/* that implement Iop_BCDtoDPB and Iop_DPBtoBCD                     */
14314c96e61dd85c172b999d6afc88ce6640aeba9962sewardj/*------------------------------------------------------------------*/
14324c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
14334c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#define NOT( x )    ( ( ( x ) == 0) ? 1 : 0)
14344c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#define GET( x, y ) ( ( ( x ) & ( 0x1UL << ( y ) ) ) >> ( y ) )
14354c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#define PUT( x, y ) ( ( x )<< ( y ) )
14364c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
14378bde7f1c67483371551aac0d4019c24c919063f7sewardjstatic ULong dpb_to_bcd( ULong chunk )
14384c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{
14394c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   Short a, b, c, d, e, f, g, h, i, j, k, m;
14404c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   Short p, q, r, s, t, u, v, w, x, y;
14414c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   ULong value;
14424c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
14434c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   /* convert 10 bit densely packed BCD to BCD */
14444c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   p = GET( chunk, 9 );
14454c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   q = GET( chunk, 8 );
14464c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   r = GET( chunk, 7 );
14474c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   s = GET( chunk, 6 );
14484c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   t = GET( chunk, 5 );
14494c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   u = GET( chunk, 4 );
14504c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   v = GET( chunk, 3 );
14514c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   w = GET( chunk, 2 );
14524c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   x = GET( chunk, 1 );
14534c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   y = GET( chunk, 0 );
14544c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
14554c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   /* The BCD bit values are given by the following boolean equations.*/
14564c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   a = ( NOT(s) & v & w ) | ( t & v & w & s ) | ( v & w & NOT(x) );
14574c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   b = ( p & s & x & NOT(t) ) | ( p & NOT(w) ) | ( p & NOT(v) );
14584c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   c = ( q & s & x & NOT(t) ) | ( q & NOT(w) ) | ( q & NOT(v) );
14594c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   d = r;
14604c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   e = ( v & NOT(w) & x ) | ( s & v & w & x ) | ( NOT(t) & v & x & w );
14614c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   f = ( p & t & v & w & x & NOT(s) ) | ( s & NOT(x) & v ) | ( s & NOT(v) );
14624c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   g = ( q & t & w & v & x & NOT(s) ) | ( t & NOT(x) & v ) | ( t & NOT(v) );
14634c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   h = u;
14644c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   i = ( t & v & w & x ) | ( s & v & w & x ) | ( v & NOT(w) & NOT(x) );
14654c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   j = ( p & NOT(s) & NOT(t) & w & v ) | ( s & v & NOT(w) & x )
14664c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | ( p & w & NOT(x) & v ) | ( w & NOT(v) );
14674c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   k = ( q & NOT(s) & NOT(t) & v & w ) | ( t & v & NOT(w) & x )
14684c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | ( q & v & w & NOT(x) ) | ( x & NOT(v) );
14694c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   m = y;
14704c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
14714c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   value = PUT(a, 11) | PUT(b, 10) | PUT(c, 9) | PUT(d, 8) | PUT(e, 7)
14724c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | PUT(f, 6) | PUT(g, 5) | PUT(h, 4) | PUT(i, 3) | PUT(j, 2)
14734c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | PUT(k, 1) | PUT(m, 0);
14744c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   return value;
14754c96e61dd85c172b999d6afc88ce6640aeba9962sewardj}
14764c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
14778bde7f1c67483371551aac0d4019c24c919063f7sewardjstatic ULong bcd_to_dpb( ULong chunk )
14784c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{
14794c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   Short a, b, c, d, e, f, g, h, i, j, k, m;
14804c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   Short p, q, r, s, t, u, v, w, x, y;
14814c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   ULong value;
14824c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   /* Convert a 3 digit BCD value to a 10 bit Densely Packed Binary (DPD) value
14834c96e61dd85c172b999d6afc88ce6640aeba9962sewardj    The boolean equations to calculate the value of each of the DPD bit
14844c96e61dd85c172b999d6afc88ce6640aeba9962sewardj    is given in Appendix B  of Book 1: Power ISA User Instruction set.  The
14854c96e61dd85c172b999d6afc88ce6640aeba9962sewardj    bits for the DPD number are [abcdefghijkm].  The bits for the BCD value
14864c96e61dd85c172b999d6afc88ce6640aeba9962sewardj    are [pqrstuvwxy].  The boolean logic equations in psuedo C code are:
14874c96e61dd85c172b999d6afc88ce6640aeba9962sewardj    */
14884c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   a = GET( chunk, 11 );
14894c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   b = GET( chunk, 10 );
14904c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   c = GET( chunk, 9 );
14914c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   d = GET( chunk, 8 );
14924c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   e = GET( chunk, 7 );
14934c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   f = GET( chunk, 6 );
14944c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   g = GET( chunk, 5 );
14954c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   h = GET( chunk, 4 );
14964c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   i = GET( chunk, 3 );
14974c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   j = GET( chunk, 2 );
14984c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   k = GET( chunk, 1 );
14994c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   m = GET( chunk, 0 );
15004c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15014c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   p = ( f & a & i & NOT(e) ) | ( j & a & NOT(i) ) | ( b & NOT(a) );
15024c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   q = ( g & a & i & NOT(e) ) | ( k & a & NOT(i) ) | ( c & NOT(a) );
15034c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   r = d;
15044c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   s = ( j & NOT(a) & e & NOT(i) ) | ( f & NOT(i) & NOT(e) )
15054c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | ( f & NOT(a) & NOT(e) ) | ( e & i );
15064c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   t = ( k & NOT(a) & e & NOT(i) ) | ( g & NOT(i) & NOT(e) )
15074c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | ( g & NOT(a) & NOT(e) ) | ( a & i );
15084c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   u = h;
15094c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   v = a | e | i;
15104c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   w = ( NOT(e) & j & NOT(i) ) | ( e & i ) | a;
15114c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   x = ( NOT(a) & k & NOT(i) ) | ( a & i ) | e;
15124c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   y = m;
15134c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15144c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   value = PUT(p, 9) | PUT(q, 8) | PUT(r, 7) | PUT(s, 6) | PUT(t, 5)
15154c96e61dd85c172b999d6afc88ce6640aeba9962sewardj            | PUT(u, 4) | PUT(v, 3) | PUT(w, 2) | PUT(x, 1) | y;
15164c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15174c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   return value;
15184c96e61dd85c172b999d6afc88ce6640aeba9962sewardj}
15194c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15208bde7f1c67483371551aac0d4019c24c919063f7sewardjULong h_calc_DPBtoBCD( ULong dpb )
15214c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{
15224c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   ULong result, chunk;
15234c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   Int i;
15244c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15254c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   result = 0;
15264c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15274c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   for (i = 0; i < 5; i++) {
15284c96e61dd85c172b999d6afc88ce6640aeba9962sewardj      chunk = dpb >> ( 4 - i ) * 10;
15294c96e61dd85c172b999d6afc88ce6640aeba9962sewardj      result = result << 12;
15304c96e61dd85c172b999d6afc88ce6640aeba9962sewardj      result |= dpb_to_bcd( chunk & 0x3FF );
15314c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   }
15324c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   return result;
15334c96e61dd85c172b999d6afc88ce6640aeba9962sewardj}
15344c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15358bde7f1c67483371551aac0d4019c24c919063f7sewardjULong h_calc_BCDtoDPB( ULong bcd )
15364c96e61dd85c172b999d6afc88ce6640aeba9962sewardj{
15374c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   ULong result, chunk;
15384c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   Int i;
15394c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15404c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   result = 0;
15414c96e61dd85c172b999d6afc88ce6640aeba9962sewardj
15424c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   for (i = 0; i < 5; i++) {
15434c96e61dd85c172b999d6afc88ce6640aeba9962sewardj      chunk = bcd >> ( 4 - i ) * 12;
15444c96e61dd85c172b999d6afc88ce6640aeba9962sewardj      result = result << 10;
15454c96e61dd85c172b999d6afc88ce6640aeba9962sewardj      result |= bcd_to_dpb( chunk & 0xFFF );
15464c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   }
15474c96e61dd85c172b999d6afc88ce6640aeba9962sewardj   return result;
15484c96e61dd85c172b999d6afc88ce6640aeba9962sewardj}
15494c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#undef NOT
15504c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#undef GET
15514c96e61dd85c172b999d6afc88ce6640aeba9962sewardj#undef PUT
1552310d6b2d02c3b22a8e496f3e26f3e9b3eb616ea5sewardj
15538bde7f1c67483371551aac0d4019c24c919063f7sewardj
15548bde7f1c67483371551aac0d4019c24c919063f7sewardj/* ----------------------------------------------------- */
15558bde7f1c67483371551aac0d4019c24c919063f7sewardj/* Signed and unsigned integer division, that behave like
1556bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   the ARMv7 UDIV ansd SDIV instructions.
1557bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj
1558bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   sdiv32 also behaves like 64-bit v8 SDIV on w-regs.
1559bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   udiv32 also behaves like 64-bit v8 UDIV on w-regs.
1560bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj*/
15618bde7f1c67483371551aac0d4019c24c919063f7sewardj/* ----------------------------------------------------- */
15628bde7f1c67483371551aac0d4019c24c919063f7sewardj
15638bde7f1c67483371551aac0d4019c24c919063f7sewardjUInt h_calc_udiv32_w_arm_semantics ( UInt x, UInt y )
15648bde7f1c67483371551aac0d4019c24c919063f7sewardj{
15658bde7f1c67483371551aac0d4019c24c919063f7sewardj   // Division by zero --> zero
15668bde7f1c67483371551aac0d4019c24c919063f7sewardj   if (UNLIKELY(y == 0)) return 0;
15678bde7f1c67483371551aac0d4019c24c919063f7sewardj   // C requires rounding towards zero, which is also what we need.
15688bde7f1c67483371551aac0d4019c24c919063f7sewardj   return x / y;
15698bde7f1c67483371551aac0d4019c24c919063f7sewardj}
15708bde7f1c67483371551aac0d4019c24c919063f7sewardj
1571bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardjULong h_calc_udiv64_w_arm_semantics ( ULong x, ULong y )
1572bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj{
1573bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // Division by zero --> zero
1574bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   if (UNLIKELY(y == 0)) return 0;
1575bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // C requires rounding towards zero, which is also what we need.
1576bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   return x / y;
1577bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj}
1578bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj
15798bde7f1c67483371551aac0d4019c24c919063f7sewardjInt h_calc_sdiv32_w_arm_semantics ( Int x, Int y )
15808bde7f1c67483371551aac0d4019c24c919063f7sewardj{
15818bde7f1c67483371551aac0d4019c24c919063f7sewardj   // Division by zero --> zero
15828bde7f1c67483371551aac0d4019c24c919063f7sewardj   if (UNLIKELY(y == 0)) return 0;
1583bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // The single case that produces an unrepresentable result
15848bde7f1c67483371551aac0d4019c24c919063f7sewardj   if (UNLIKELY( ((UInt)x) == ((UInt)0x80000000)
15858bde7f1c67483371551aac0d4019c24c919063f7sewardj                 && ((UInt)y) == ((UInt)0xFFFFFFFF) ))
15868bde7f1c67483371551aac0d4019c24c919063f7sewardj      return (Int)(UInt)0x80000000;
15878bde7f1c67483371551aac0d4019c24c919063f7sewardj   // Else return the result rounded towards zero.  C89 says
15888bde7f1c67483371551aac0d4019c24c919063f7sewardj   // this is implementation defined (in the signed case), but gcc
15898bde7f1c67483371551aac0d4019c24c919063f7sewardj   // promises to round towards zero.  Nevertheless, at startup,
15908bde7f1c67483371551aac0d4019c24c919063f7sewardj   // in main_main.c, do a check for that.
15918bde7f1c67483371551aac0d4019c24c919063f7sewardj   return x / y;
15928bde7f1c67483371551aac0d4019c24c919063f7sewardj}
15938bde7f1c67483371551aac0d4019c24c919063f7sewardj
1594bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardjLong h_calc_sdiv64_w_arm_semantics ( Long x, Long y )
1595bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj{
1596bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // Division by zero --> zero
1597bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   if (UNLIKELY(y == 0)) return 0;
1598bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // The single case that produces an unrepresentable result
1599bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   if (UNLIKELY( ((ULong)x) == ((ULong)0x8000000000000000ULL )
1600bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj                 && ((ULong)y) == ((ULong)0xFFFFFFFFFFFFFFFFULL ) ))
1601bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj      return (Long)(ULong)0x8000000000000000ULL;
1602bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // Else return the result rounded towards zero.  C89 says
1603bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // this is implementation defined (in the signed case), but gcc
1604bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // promises to round towards zero.  Nevertheless, at startup,
1605bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   // in main_main.c, do a check for that.
1606bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj   return x / y;
1607bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj}
1608bbcf188f6ae64a44fb31414eb9e1a738b4befcc0sewardj
16098bde7f1c67483371551aac0d4019c24c919063f7sewardj
161038a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
1611cef7d3e3df4796e35b4521158d9dc058f034aa87sewardj/*--- end                               host_generic_simd64.c ---*/
161238a3f868aebe4ade7279d7168e0efb6a98eaed5fsewardj/*---------------------------------------------------------------*/
1613