1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- begin                               guest_generic_x87.c ---*/
4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*
7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This file is part of Valgrind, a dynamic binary instrumentation
8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   framework.
9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
10436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov   Copyright (C) 2004-2013 OpenWorks LLP
11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      info@open-works.net
12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is free software; you can redistribute it and/or
14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   modify it under the terms of the GNU General Public License as
15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   published by the Free Software Foundation; either version 2 of the
16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   License, or (at your option) any later version.
17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   This program is distributed in the hope that it will be useful, but
19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   WITHOUT ANY WARRANTY; without even the implied warranty of
20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   General Public License for more details.
22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   You should have received a copy of the GNU General Public License
24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   along with this program; if not, write to the Free Software
25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   02110-1301, USA.
27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   The GNU General Public License is contained in the file COPYING.
29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Neither the names of the U.S. Department of Energy nor the
31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   University of California nor the names of its contributors may be
32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   used to endorse or promote products derived from this software
33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   without prior written permission.
34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This file contains functions for doing some x87-specific
37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   operations.  Both the amd64 and x86 front ends (guests) indirectly
38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   call these functions via guest helper calls.  By putting them here,
39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   code duplication is avoided.  Some of these functions are tricky
40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   and hard to verify, so there is much to be said for only having one
41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   copy thereof.
42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "libvex_basictypes.h"
45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "main_util.h"
47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "guest_generic_x87.h"
48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 80 and 64-bit floating point formats:
51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   80-bit:
53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  0       0-------0      zero
55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  0       0X------X      denormals
56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  1-7FFE  1X------X      normals (all normals have leading 1)
57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FFF    10------0      infinity
58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FFF    10X-----X      snan
59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FFF    11X-----X      qnan
60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   S is the sign bit.  For runs X----X, at least one of the Xs must be
62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   nonzero.  Exponent is 15 bits, fractional part is 63 bits, and
63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   there is an explicitly represented leading 1, and a sign bit,
64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   giving 80 in total.
65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   64-bit avoids the confusion of an explicitly represented leading 1
67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   and so is simpler:
68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  0      0------0   zero
70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  0      X------X   denormals
71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  1-7FE  any        normals
72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FF    0------0   infinity
73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FF    0X-----X   snan
74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FF    1X-----X   qnan
75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Exponent is 11 bits, fractional part is 52 bits, and there is a
77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   sign bit, giving 64 in total.
78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt read_bit_array ( UChar* arr, UInt n )
82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UChar c = arr[n >> 3];
84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c >>= (n&7);
85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return c & 1;
86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline void write_bit_array ( UChar* arr, UInt n, UInt b )
89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UChar c = arr[n >> 3];
91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c = toUChar( c & ~(1 << (n&7)) );
92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   c = toUChar( c | ((b&1) << (n&7)) );
93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   arr[n >> 3] = c;
94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Convert an IEEE754 double (64-bit) into an x87 extended double
97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   (80-bit), mimicing the hardware fairly closely.  Both numbers are
98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   stored little-endian.  Limitations, all of which could be fixed,
99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   given some level of hassle:
100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   * Identity of NaNs is not preserved.
102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   See comments in the code for more details.
104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool  mantissaIsZero;
108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int   bexp, i, j, shift;
109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UChar sign;
110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   sign = toUChar( (f64[7] >> 7) & 1 );
112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   bexp &= 0x7FF;
114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   mantissaIsZero = False;
116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp == 0 || bexp == 0x7FF) {
117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* We'll need to know whether or not the mantissa (bits 51:0) is
118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         all zeroes in order to handle these cases.  So figure it
119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         out. */
120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      mantissaIsZero
121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         = toBool(
122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown              (f64[6] & 0x0F) == 0
123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown              && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown              && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown           );
126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If the exponent is zero, either we have a zero or a denormal.
129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      Produce a zero.  This is a hack in that it forces denormals to
130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      zero.  Could do better. */
131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp == 0) {
132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f80[9] = toUChar( sign << 7 );
133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown             = f80[3] = f80[2] = f80[1] = f80[0] = 0;
135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (mantissaIsZero)
137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* It really is zero, so that's all we can do. */
138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return;
139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* There is at least one 1-bit in the mantissa.  So it's a
141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         potentially denormalised double -- but we can produce a
142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         normalised long double.  Count the leading zeroes in the
143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         mantissa so as to decide how much to bump the exponent down
144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         by.  Note, this is SLOW. */
145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      shift = 0;
146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 51; i >= 0; i--) {
147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        if (read_bit_array(f64, i))
148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown           break;
149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown        shift++;
150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* and copy into place as many bits as we can get our hands on. */
153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      j = 63;
154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 51 - shift; i >= 0; i--) {
155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         write_bit_array( f80, j,
156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown     	 read_bit_array( f64, i ) );
157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         j--;
158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Set the exponent appropriately, and we're done. */
161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      bexp -= shift;
162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      bexp += (16383 - 1023);
163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f80[8] = toUChar( bexp & 0xFF );
165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If the exponent is 7FF, this is either an Infinity, a SNaN or
169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      QNaN, as determined by examining bits 51:0, thus:
170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          0  ... 0    Inf
171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          0X ... X    SNaN
172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown          1X ... X    QNaN
173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      where at least one of the Xs is not zero.
174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   */
175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp == 0x7FF) {
176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (mantissaIsZero) {
177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* Produce an appropriately signed infinity:
178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            S 1--1 (15)  1  0--0 (63)
179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         */
180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[9] = toUChar( (sign << 7) | 0x7F );
181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[8] = 0xFF;
182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[7] = 0x80;
183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[6] = f80[5] = f80[4] = f80[3]
184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                = f80[2] = f80[1] = f80[0] = 0;
185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return;
186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* So it's either a QNaN or SNaN.  Distinguish by considering
188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         bit 51.  Note, this destroys all the trailing bits
189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         (identity?) of the NaN.  IEEE754 doesn't require preserving
190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         these (it only requires that there be one QNaN value and one
191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         SNaN value), but x87 does seem to have some ability to
192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         preserve them.  Anyway, here, the NaN's identity is
193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         destroyed.  Could be improved. */
194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (f64[6] & 8) {
195663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         /* QNaN.  Make a canonical QNaN:
196663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            S 1--1 (15)  1 1  0--0 (62)
197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         */
198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[9] = toUChar( (sign << 7) | 0x7F );
199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[8] = 0xFF;
200663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         f80[7] = 0xC0;
201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[6] = f80[5] = f80[4] = f80[3]
202663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                = f80[2] = f80[1] = f80[0] = 0x00;
203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      } else {
204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* SNaN.  Make a SNaN:
205663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            S 1--1 (15)  1 0  1--1 (62)
206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         */
207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[9] = toUChar( (sign << 7) | 0x7F );
208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[8] = 0xFF;
209663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         f80[7] = 0xBF;
210ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f80[6] = f80[5] = f80[4] = f80[3]
211ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                = f80[2] = f80[1] = f80[0] = 0xFF;
212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* It's not a zero, denormal, infinity or nan.  So it must be a
217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      normalised number.  Rebias the exponent and build the new
218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      number.  */
219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   bexp += (16383 - 1023);
220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[8] = toUChar( bexp & 0xFF );
223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                              | ((f64[5] >> 5) & 7) );
225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f80[0] = toUChar( 0 );
232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Convert an x87 extended double (80-bit) into an IEEE 754 double
236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   (64-bit), mimicking the hardware fairly closely.  Both numbers are
237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   stored little-endian.  Limitations, both of which could be fixed,
238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   given some level of hassle:
239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   * Rounding following truncation could be a bit better.
241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   * Identity of NaNs is not preserved.
243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   See comments in the code for more details.
245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Bool  isInf;
249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int   bexp, i, j;
250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UChar sign;
251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   sign = toUChar((f80[9] >> 7) & 1);
253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
254ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   bexp &= 0x7FFF;
255ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
256ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If the exponent is zero, either we have a zero or a denormal.
257ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      But an extended precision denormal becomes a double precision
258ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      zero, so in either case, just produce the appropriately signed
259ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      zero. */
260ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp == 0) {
261ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[7] = toUChar(sign << 7);
262ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
263ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      QNaN, as determined by examining bits 62:0, thus:
268663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng          10  ... 0    Inf
269663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng          10X ... X    SNaN
270663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng          11X ... X    QNaN
271ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      where at least one of the Xs is not zero.
272ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   */
273ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp == 0x7FFF) {
274ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      isInf = toBool(
275ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                 (f80[7] & 0x7F) == 0
276ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
277ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
278ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                 && f80[0] == 0
279ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown              );
280ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (isInf) {
281ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (0 == (f80[7] & 0x80))
282ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            goto wierd_NaN;
283ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* Produce an appropriately signed infinity:
284ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            S 1--1 (11)  0--0 (52)
285ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         */
286ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[7] = toUChar((sign << 7) | 0x7F);
287ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[6] = 0xF0;
288ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
289ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return;
290ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
291ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* So it's either a QNaN or SNaN.  Distinguish by considering
292663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         bit 61.  Note, this destroys all the trailing bits
293ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         (identity?) of the NaN.  IEEE754 doesn't require preserving
294ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         these (it only requires that there be one QNaN value and one
295ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         SNaN value), but x87 does seem to have some ability to
296ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         preserve them.  Anyway, here, the NaN's identity is
297ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         destroyed.  Could be improved. */
298663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (f80[7] & 0x40) {
299663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         /* QNaN.  Make a canonical QNaN:
300663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            S 1--1 (11)  1  0--0 (51)
301ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         */
302ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[7] = toUChar((sign << 7) | 0x7F);
303663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         f64[6] = 0xF8;
304663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0x00;
305ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      } else {
306ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* SNaN.  Make a SNaN:
307ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            S 1--1 (11)  0  1--1 (51)
308ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         */
309ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[7] = toUChar((sign << 7) | 0x7F);
310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[6] = 0xF7;
311ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
312ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
313ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
314ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
315ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
316ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
317ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      zero, the x87 FPU appears to consider the number denormalised
318ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      and converts it to a QNaN. */
319ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (0 == (f80[7] & 0x80)) {
320ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      wierd_NaN:
321ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Strange hardware QNaN:
322ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         S 1--1 (11)  1  0--0 (51)
323ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      */
324ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* On a PIII, these QNaNs always appear with sign==1.  I have
325ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         no idea why. */
326ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[7] = (1 /*sign*/ << 7) | 0x7F;
327ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[6] = 0xF8;
328ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
329ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
330ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
331ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
332ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* It's not a zero, denormal, infinity or nan.  So it must be a
333ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      normalised number.  Rebias the exponent and consider. */
334ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   bexp -= (16383 - 1023);
335ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp >= 0x7FF) {
336ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* It's too big for a double.  Construct an infinity. */
337ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[7] = toUChar((sign << 7) | 0x7F);
338ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[6] = 0xF0;
339ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
340ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
341ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
342ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
343ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (bexp <= 0) {
344ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* It's too small for a normalised double.  First construct a
345ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         zero and then see if it can be improved into a denormal.  */
346ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[7] = toUChar(sign << 7);
347ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
348ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
349ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (bexp < -52)
350ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* Too small even for a denormal. */
351ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return;
352ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
353ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Ok, let's make a denormal.  Note, this is SLOW. */
354ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
355ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
356ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* bexp is in range -52 .. 0 inclusive */
357ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 63; i >= 0; i--) {
358ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         j = i - 12 + bexp;
359ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (j < 0) break;
360ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         /* We shouldn't really call vassert from generated code. */
361ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         vassert(j >= 0 && j < 52);
362ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         write_bit_array ( f64,
363ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                           j,
364ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                           read_bit_array ( f80, i ) );
365ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
366ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* and now we might have to round ... */
367ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (read_bit_array(f80, 10+1 - bexp) == 1)
368ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         goto do_rounding;
369ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
370ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return;
371ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
372ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
373ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Ok, it's a normalised number which is representable as a double.
374ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      Copy the exponent and mantissa into place. */
375ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*
376ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   for (i = 0; i < 52; i++)
377ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      write_bit_array ( f64,
378ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        i,
379ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                        read_bit_array ( f80, i+11 ) );
380ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   */
381ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
382ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
383ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
384ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
385ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
386ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
387ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
388ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
389ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
390ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
391ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
392ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Now consider any rounding that needs to happen as a result of
393ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      truncating the mantissa. */
394ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
395ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
396ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* If the bottom bits of f80 are "100 0000 0000", then the
397ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         infinitely precise value is deemed to be mid-way between the
398ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         two closest representable values.  Since we're doing
399ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         round-to-nearest (the default mode), in that case it is the
400ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         bit immediately above which indicates whether we should round
401ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         upwards or not -- if 0, we don't.  All that is encapsulated
402ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         in the following simple test. */
403ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
404ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return;
405ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
406ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      do_rounding:
407ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Round upwards.  This is a kludge.  Once in every 2^24
408ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         roundings (statistically) the bottom three bytes are all 0xFF
409ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         and so we don't round at all.  Could be improved. */
410ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (f64[0] != 0xFF) {
411ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[0]++;
412ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
413ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      else
414ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (f64[0] == 0xFF && f64[1] != 0xFF) {
415ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[0] = 0;
416ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[1]++;
417ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
418ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      else
419ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
420ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[0] = 0;
421ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[1] = 0;
422ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         f64[2]++;
423ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
424ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* else we don't round, but we should. */
425ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
426ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
427ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
428ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
429ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* CALLED FROM GENERATED CODE: CLEAN HELPER */
430ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Extract the signed significand or exponent component as per
431ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   fxtract.  Arg and result are doubles travelling under the guise of
432ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   ULongs.  Returns significand when getExp is zero and exponent
433ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   otherwise. */
434ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
435ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
436ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   ULong  uSig, uExp;
437ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Long   sSig; */
438ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int    sExp, i;
439ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt   sign, expExp;
440ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
441ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*
442ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FF    0------0   infinity
443ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FF    0X-----X   snan
444ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown    S  7FF    1X-----X   qnan
445ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   */
446ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong posInf  = 0x7FF0000000000000ULL;
447ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong negInf  = 0xFFF0000000000000ULL;
448ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong nanMask = 0x7FF0000000000000ULL;
449ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong qNan    = 0x7FF8000000000000ULL;
450ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong posZero = 0x0000000000000000ULL;
451ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong negZero = 0x8000000000000000ULL;
452ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong bit51   = 1ULL << 51;
453ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong bit52   = 1ULL << 52;
454ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   const ULong sigMask = bit52 - 1;
455ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
456ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Mimic Core i5 behaviour for special cases. */
457ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (arg == posInf)
458ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return getExp ? posInf : posInf;
459ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (arg == negInf)
460ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return getExp ? posInf : negInf;
461ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if ((arg & nanMask) == nanMask)
462ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return qNan | (arg & (1ULL << 63));
463ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (arg == posZero)
464ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return getExp ? negInf : posZero;
465ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (arg == negZero)
466ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return getExp ? negInf : negZero;
467ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
468ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Split into sign, exponent and significand. */
469ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   sign = ((UInt)(arg >> 63)) & 1;
470ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
471ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
472ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   uSig = arg & sigMask;
473ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
474ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Get the exponent. */
475ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   sExp = ((Int)(arg >> 52)) & 0x7FF;
476ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
477ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Deal with denormals: if the exponent is zero, then the
478ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      significand cannot possibly be zero (negZero/posZero are handled
479ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      above).  Shift the significand left until bit 51 of it becomes
480ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      1, and decrease the exponent accordingly.
481ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   */
482ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (sExp == 0) {
483ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 0; i < 52; i++) {
484ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (uSig & bit51)
485ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            break;
486ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         uSig <<= 1;
487ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         sExp--;
488ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
489ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uSig <<= 1;
490ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
491ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Add the implied leading-1 in the significand. */
492ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uSig |= bit52;
493ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
494ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
495ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Roll in the sign. */
496ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* sSig = uSig; */
497ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* if (sign) sSig =- sSig; */
498ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
499ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Convert sig into a double.  This should be an exact conversion.
500ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      Then divide by 2^52, which should give a value in the range 1.0
501ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      to 2.0-epsilon, at least for normalised args. */
502ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* dSig = (Double)sSig; */
503ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* dSig /= 67108864.0;  */ /* 2^26 */
504ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* dSig /= 67108864.0;  */ /* 2^26 */
505ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   uSig &= sigMask;
506ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   uSig |= 0x3FF0000000000000ULL;
507ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (sign)
508ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uSig ^= negZero;
509ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
510ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Convert exp into a double.  Also an exact conversion. */
511ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* dExp = (Double)(sExp - 1023); */
512ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   sExp -= 1023;
513ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (sExp == 0) {
514ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uExp = 0;
515ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
516ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uExp   = sExp < 0 ? -sExp : sExp;
517ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      expExp = 0x3FF +52;
518ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* 1 <= uExp <= 1074 */
519ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* Skip first 42 iterations of normalisation loop as we know they
520ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         will always happen */
521ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uExp <<= 42;
522ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      expExp -= 42;
523ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 0; i < 52-42; i++) {
524ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if (uExp & bit52)
525ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            break;
526ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         uExp <<= 1;
527ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         expExp--;
528ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
529ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uExp &= sigMask;
530ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      uExp |= ((ULong)expExp) << 52;
531ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (sExp < 0) uExp ^= negZero;
532ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
533ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
534ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return getExp ? uExp : uSig;
535ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
536ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
537ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
538ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
539ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------*/
540ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- SSE4.2 PCMP{E,I}STR{I,M} helpers                  ---*/
541ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------*/
542ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
543ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* We need the definitions for OSZACP eflags/rflags offsets.
544ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   #including guest_{amd64,x86}_defs.h causes chaos, so just copy the
545ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   required values directly.  They are not going to change in the
546ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   foreseeable future :-)
547ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
548ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
549ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_O   11
550ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_S   7
551ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_Z   6
552ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_A   4
553ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_C   0
554ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_P   2
555ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
556ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_O    (1 << SHIFT_O)
557ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_S    (1 << SHIFT_S)
558ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_Z    (1 << SHIFT_Z)
559ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_A    (1 << SHIFT_A)
560ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_C    (1 << SHIFT_C)
561ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_P    (1 << SHIFT_P)
562ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
563ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
564ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's
565ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Delight. */
566ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UInt clz32 ( UInt x )
567ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
568ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   Int y, m, n;
569ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   y = -(x >> 16);
570ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   m = (y >> 16) & 16;
571ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   n = 16 - m;
572ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   x = x >> m;
573ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   y = x - 0x100;
574ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   m = (y >> 16) & 8;
575ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   n = n + m;
576ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   x = x << m;
577ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   y = x - 0x1000;
578ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   m = (y >> 16) & 4;
579ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   n = n + m;
580ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   x = x << m;
581ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   y = x - 0x4000;
582ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   m = (y >> 16) & 2;
583ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   n = n + m;
584ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   x = x << m;
585ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   y = x >> 14;
586ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   m = y & ~(y >> 1);
587ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return n + 2 - m;
588ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
589ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
590ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UInt ctz32 ( UInt x )
591ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
592ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return 32 - clz32((~x) & (x-1));
593ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
594ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
595ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Convert a 4-bit value to a 32-bit value by cloning each bit 8
596ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   times.  There's surely a better way to do this, but I don't know
597ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   what it is. */
598ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UInt bits4_to_bytes4 ( UInt bits4 )
599ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
600ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt r = 0;
601ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   r |= (bits4 & 1) ? 0x000000FF : 0;
602ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   r |= (bits4 & 2) ? 0x0000FF00 : 0;
603ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   r |= (bits4 & 4) ? 0x00FF0000 : 0;
604ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   r |= (bits4 & 8) ? 0xFF000000 : 0;
605ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return r;
606ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
607ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
608ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
609663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* Convert a 2-bit value to a 32-bit value by cloning each bit 16
610663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   times.  There's surely a better way to do this, but I don't know
611663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   what it is. */
612663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic UInt bits2_to_bytes4 ( UInt bits2 )
613663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
614663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   UInt r = 0;
615663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   r |= (bits2 & 1) ? 0x0000FFFF : 0;
616663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   r |= (bits2 & 2) ? 0xFFFF0000 : 0;
617663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   return r;
618663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
619663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
620663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
621ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Given partial results from a pcmpXstrX operation (intRes1,
622ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   basically), generate an I- or M-format output value, also the new
623ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   OSZACP flags.  */
624ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic
625ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
626ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                   /*OUT*/UInt* resOSZACP,
627ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                   UInt intRes1,
628ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                   UInt zmaskL, UInt zmaskR,
629ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                   UInt validL,
630ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                   UInt pol, UInt idx,
631ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                   Bool isxSTRM )
632ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
633ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   vassert((pol >> 2) == 0);
634ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   vassert((idx >> 1) == 0);
635ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
636ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt intRes2 = 0;
637ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   switch (pol) {
638ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 0: intRes2 = intRes1;          break; // pol +
639ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 1: intRes2 = ~intRes1;         break; // pol -
640ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 2: intRes2 = intRes1;          break; // pol m+
641ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
642ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
643ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   intRes2 &= 0xFFFF;
644ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
645ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (isxSTRM) {
646ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
647ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate M-format output (a bit or byte mask in XMM0)
648ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (idx) {
649ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[0] = bits4_to_bytes4( (intRes2 >>  0) & 0xF );
650ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[1] = bits4_to_bytes4( (intRes2 >>  4) & 0xF );
651ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[2] = bits4_to_bytes4( (intRes2 >>  8) & 0xF );
652ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
653ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      } else {
654ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[0] = intRes2 & 0xFFFF;
655ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[1] = 0;
656ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[2] = 0;
657ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV->w32[3] = 0;
658ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
659ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
660ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   } else {
661ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
662ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate I-format output (an index in ECX)
663ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate ecx value
664ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt newECX = 0;
665ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      if (idx) {
666ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         // index of ms-1-bit
667ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
668ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      } else {
669ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         // index of ls-1-bit
670ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
671ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
672ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
673ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      resV->w32[0] = newECX;
674ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      resV->w32[1] = 0;
675ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      resV->w32[2] = 0;
676ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      resV->w32[3] = 0;
677ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
678ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
679ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
680ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   // generate new flags, common to all ISTRI and ISTRM cases
681ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   *resOSZACP    // A, P are zero
682ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
683ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
684ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
685ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
686ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
687ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
688ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
689663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
690663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   basically), generate an I- or M-format output value, also the new
691663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   OSZACP flags.  */
692663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic
693663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengvoid compute_PCMPxSTRx_gen_output_wide (/*OUT*/V128* resV,
694663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                        /*OUT*/UInt* resOSZACP,
695663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                        UInt intRes1,
696663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                        UInt zmaskL, UInt zmaskR,
697663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                        UInt validL,
698663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                        UInt pol, UInt idx,
699663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                        Bool isxSTRM )
700663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
701663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vassert((pol >> 2) == 0);
702663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vassert((idx >> 1) == 0);
703663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
704663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   UInt intRes2 = 0;
705663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   switch (pol) {
706663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      case 0: intRes2 = intRes1;          break; // pol +
707663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      case 1: intRes2 = ~intRes1;         break; // pol -
708663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      case 2: intRes2 = intRes1;          break; // pol m+
709663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      case 3: intRes2 = intRes1 ^ validL; break; // pol m-
710663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
711663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   intRes2 &= 0xFF;
712663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
713663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (isxSTRM) {
714663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
715663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate M-format output (a bit or byte mask in XMM0)
716663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (idx) {
717663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[0] = bits2_to_bytes4( (intRes2 >> 0) & 0x3 );
718663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[1] = bits2_to_bytes4( (intRes2 >> 2) & 0x3 );
719663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[2] = bits2_to_bytes4( (intRes2 >> 4) & 0x3 );
720663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[3] = bits2_to_bytes4( (intRes2 >> 6) & 0x3 );
721663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      } else {
722663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[0] = intRes2 & 0xFF;
723663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[1] = 0;
724663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[2] = 0;
725663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV->w32[3] = 0;
726663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
727663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
728663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   } else {
729663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
730663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate I-format output (an index in ECX)
731663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate ecx value
732663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt newECX = 0;
733663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      if (idx) {
734663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         // index of ms-1-bit
735663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
736663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      } else {
737663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         // index of ls-1-bit
738663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
739663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
740663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
741663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      resV->w32[0] = newECX;
742663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      resV->w32[1] = 0;
743663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      resV->w32[2] = 0;
744663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      resV->w32[3] = 0;
745663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
746663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
747663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
748663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   // generate new flags, common to all ISTRI and ISTRM cases
749663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   *resOSZACP    // A, P are zero
750663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng     = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
751663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng     | ((zmaskL == 0)  ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
752663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng     | ((zmaskR == 0)  ? 0 : MASK_S) // S == 1 iff any in argR is 0
753663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng     | ((intRes2 & 1) << SHIFT_O);   // O == IntRes2[0]
754663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
755663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
756663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
757ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
758663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   variants on 8-bit data.
759ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
760ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   For xSTRI variants, the new ECX value is placed in the 32 bits
761ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
762ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   variants, the result is a 128 bit value and is placed at *resV in
763ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   the obvious way.
764ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
765ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   For all variants, the new OSZACP value is placed at *resOSZACP.
766ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
767ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   argLV and argRV are the vector args.  The caller must prepare a
768ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   16-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
769ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   must be 1 for each zero byte of of the respective arg.  For ESTRx
770ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   variants this is derived from the explicit length indication, and
771ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   must be 0 in all places except at the bit index corresponding to
772ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   the valid length (0 .. 16).  If the valid length is 16 then the
773ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   mask must be all zeroes.  In all cases, bits 31:16 must be zero.
774ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
775ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   imm8 is the original immediate from the instruction.  isSTRM
776ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   indicates whether this is a xSTRM or xSTRI variant, which controls
777ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   how much of *res is written.
778ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
779ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   If the given imm8 case can be handled, the return value is True.
780ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   If not, False is returned, and neither *res not *resOSZACP are
781ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   altered.
782ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/
783ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
784ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownBool compute_PCMPxSTRx ( /*OUT*/V128* resV,
785ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                         /*OUT*/UInt* resOSZACP,
786ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                         V128* argLV,  V128* argRV,
787ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                         UInt zmaskL, UInt zmaskR,
788ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                         UInt imm8,   Bool isxSTRM )
789ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{
790ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   vassert(imm8 < 0x80);
791ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   vassert((zmaskL >> 16) == 0);
792ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   vassert((zmaskR >> 16) == 0);
793ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
794ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /* Explicitly reject any imm8 values that haven't been validated,
795ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      even if they would probably work.  Life is too short to have
796ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      unvalidated cases in the code base. */
797ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   switch (imm8) {
798eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov      case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E:
799eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov      case 0x12: case 0x14: case 0x1A:
800eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov      case 0x30: case 0x34: case 0x38: case 0x3A:
801eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov      case 0x40: case 0x44: case 0x46: case 0x4A:
802ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         break;
803ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      default:
804ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         return False;
805ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
806ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
807ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
808ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
809ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
810ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
811ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
812ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
813ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*-- strcmp on byte data                --*/
814ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
815ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
816ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (agg == 2/*equal each, aka strcmp*/
817ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
818ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      Int    i;
819ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argL = (UChar*)argLV;
820ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argR = (UChar*)argRV;
821ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt boolResII = 0;
822ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (i = 15; i >= 0; i--) {
823ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UChar cL  = argL[i];
824ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UChar cR  = argR[i];
825ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
826ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
827ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
828ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
829ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
830ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // do invalidation, common to all equal-each cases
831ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt intRes1
832ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         = (boolResII & validL & validR)  // if both valid, use cmpres
833ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown           | (~ (validL | validR));       // if both invalid, force 1
834ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown                                          // else force 0
835ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      intRes1 &= 0xFFFF;
836ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
837ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate I-format output
838ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      compute_PCMPxSTRx_gen_output(
839ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV, resOSZACP,
840ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
841ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
842ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
843ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return True;
844ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
845ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
846ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
847ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*-- set membership on byte data        --*/
848ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
849ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
850ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (agg == 0/*equal any, aka find chars in a set*/
851ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
852ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* argL: the string,  argR: charset */
853ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   si, ci;
854ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argL    = (UChar*)argLV;
855ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argR    = (UChar*)argRV;
856ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   boolRes = 0;
857ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
858ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
859ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
860ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (si = 0; si < 16; si++) {
861ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if ((validL & (1 << si)) == 0)
862ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            // run off the end of the string.
863ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            break;
864ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UInt m = 0;
865ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         for (ci = 0; ci < 16; ci++) {
866ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if ((validR & (1 << ci)) == 0) break;
867ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if (argR[ci] == argL[si]) { m = 1; break; }
868ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }
869ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         boolRes |= (m << si);
870ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
871ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
872ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // boolRes is "pre-invalidated"
873ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt intRes1 = boolRes & 0xFFFF;
874ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
875ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate I-format output
876ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      compute_PCMPxSTRx_gen_output(
877ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV, resOSZACP,
878ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
879ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
880ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
881ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return True;
882ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
883ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
884ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
885ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*-- substring search on byte data      --*/
886ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
887ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
888ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (agg == 3/*equal ordered, aka substring search*/
889ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
890ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
891ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* argL: haystack,  argR: needle */
892ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   ni, hi;
893ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argL    = (UChar*)argLV;
894ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argR    = (UChar*)argRV;
895ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   boolRes = 0;
896ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
897ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
898ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (hi = 0; hi < 16; hi++) {
899ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UInt m = 1;
900ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         for (ni = 0; ni < 16; ni++) {
901ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if ((validR & (1 << ni)) == 0) break;
902ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            UInt i = ni + hi;
903ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if (i >= 16) break;
904ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if (argL[i] != argR[ni]) { m = 0; break; }
905ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }
906ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         boolRes |= (m << hi);
907663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         if ((validL & (1 << hi)) == 0)
908663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            // run off the end of the haystack
909663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            break;
910ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
911ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
912ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // boolRes is "pre-invalidated"
913ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt intRes1 = boolRes & 0xFFFF;
914ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
915ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate I-format output
916ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      compute_PCMPxSTRx_gen_output(
917ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV, resOSZACP,
918ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
919ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
920ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
921ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return True;
922ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
923ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
924ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
925ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*-- ranges, unsigned byte data         --*/
926ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   /*----------------------------------------*/
927ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
928ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   if (agg == 1/*ranges*/
929ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown       && fmt == 0/*ub*/) {
930ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
931ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      /* argL: string,  argR: range-pairs */
932ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   ri, si;
933ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argL    = (UChar*)argLV;
934ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UChar* argR    = (UChar*)argRV;
935ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   boolRes = 0;
936ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
937ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
938ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      for (si = 0; si < 16; si++) {
939ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         if ((validL & (1 << si)) == 0)
940ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            // run off the end of the string
941ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            break;
942ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         UInt m = 0;
943ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         for (ri = 0; ri < 16; ri += 2) {
944ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if ((validR & (3 << ri)) != (3 << ri)) break;
945ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
946ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown               m = 1; break;
947ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown            }
948ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         }
949ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         boolRes |= (m << si);
950ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      }
951ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
952ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // boolRes is "pre-invalidated"
953ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      UInt intRes1 = boolRes & 0xFFFF;
954ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
955ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      // generate I-format output
956ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      compute_PCMPxSTRx_gen_output(
957ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         resV, resOSZACP,
958ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
959ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      );
960ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
961ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown      return True;
962ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   }
963ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
964663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
965663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*-- ranges, signed byte data           --*/
966663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
967663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
968663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (agg == 1/*ranges*/
969663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng       && fmt == 2/*sb*/) {
970663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
971663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      /* argL: string,  argR: range-pairs */
972663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt   ri, si;
973663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      Char*  argL    = (Char*)argLV;
974663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      Char*  argR    = (Char*)argRV;
975663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt   boolRes = 0;
976663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt   validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
977663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt   validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
978663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      for (si = 0; si < 16; si++) {
979663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         if ((validL & (1 << si)) == 0)
980663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            // run off the end of the string
981663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            break;
982663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         UInt m = 0;
983663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         for (ri = 0; ri < 16; ri += 2) {
984663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if ((validR & (3 << ri)) != (3 << ri)) break;
985663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
986663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng               m = 1; break;
987663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            }
988663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         }
989663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         boolRes |= (m << si);
990663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
991663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
992663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // boolRes is "pre-invalidated"
993663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt intRes1 = boolRes & 0xFFFF;
994663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
995663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate I-format output
996663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      compute_PCMPxSTRx_gen_output(
997663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV, resOSZACP,
998663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
999663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
1000663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1001663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      return True;
1002663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
1003663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1004663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   return False;
1005663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng}
1006663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1007663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1008663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
1009663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   variants on 16-bit characters.
1010663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1011663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   For xSTRI variants, the new ECX value is placed in the 32 bits
1012663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   pointed to by *resV, and the top 96 bits are zeroed.  For xSTRM
1013663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   variants, the result is a 128 bit value and is placed at *resV in
1014663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   the obvious way.
1015663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1016663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   For all variants, the new OSZACP value is placed at *resOSZACP.
1017663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1018663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   argLV and argRV are the vector args.  The caller must prepare a
1019663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   8-bit mask for each, zmaskL and zmaskR.  For ISTRx variants this
1020663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   must be 1 for each zero byte of of the respective arg.  For ESTRx
1021663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   variants this is derived from the explicit length indication, and
1022663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   must be 0 in all places except at the bit index corresponding to
1023663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   the valid length (0 .. 8).  If the valid length is 8 then the
1024663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   mask must be all zeroes.  In all cases, bits 31:8 must be zero.
1025663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1026663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   imm8 is the original immediate from the instruction.  isSTRM
1027663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   indicates whether this is a xSTRM or xSTRI variant, which controls
1028663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   how much of *res is written.
1029663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1030663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   If the given imm8 case can be handled, the return value is True.
1031663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   If not, False is returned, and neither *res not *resOSZACP are
1032663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   altered.
1033663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng*/
1034663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1035663860b1408516d02ebfcb3a9999a134e6cfb223Ben ChengBool compute_PCMPxSTRx_wide ( /*OUT*/V128* resV,
1036663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              /*OUT*/UInt* resOSZACP,
1037663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              V128* argLV,  V128* argRV,
1038663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              UInt zmaskL, UInt zmaskR,
1039663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                              UInt imm8,   Bool isxSTRM )
1040663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{
1041663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vassert(imm8 < 0x80);
1042663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vassert((zmaskL >> 8) == 0);
1043663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   vassert((zmaskR >> 8) == 0);
1044663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1045663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /* Explicitly reject any imm8 values that haven't been validated,
1046663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      even if they would probably work.  Life is too short to have
1047663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      unvalidated cases in the code base. */
1048663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   switch (imm8) {
1049eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov      case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
1050eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov      case 0x13:            case 0x1B:
1051eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov                            case 0x39: case 0x3B:
1052eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov                 case 0x45:            case 0x4B:
1053663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         break;
1054663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      default:
1055663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         return False;
1056663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
1057663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1058663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   UInt fmt = (imm8 >> 0) & 3; // imm8[1:0]  data format
1059663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   UInt agg = (imm8 >> 2) & 3; // imm8[3:2]  aggregation fn
1060663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   UInt pol = (imm8 >> 4) & 3; // imm8[5:4]  polarity
1061663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   UInt idx = (imm8 >> 6) & 1; // imm8[6]    1==msb/bytemask
1062663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1063663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1064663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*-- strcmp on wide data                --*/
1065663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1066663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1067663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (agg == 2/*equal each, aka strcmp*/
1068663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng       && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1069663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      Int     i;
1070663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argL = (UShort*)argLV;
1071663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argR = (UShort*)argRV;
1072663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt boolResII = 0;
1073663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      for (i = 7; i >= 0; i--) {
1074663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         UShort cL  = argL[i];
1075663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         UShort cR  = argR[i];
1076663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
1077663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
1078663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt validL = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1079663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt validR = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1080663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1081663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // do invalidation, common to all equal-each cases
1082663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt intRes1
1083663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         = (boolResII & validL & validR)  // if both valid, use cmpres
1084663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng           | (~ (validL | validR));       // if both invalid, force 1
1085663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng                                          // else force 0
1086663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      intRes1 &= 0xFF;
1087663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1088663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate I-format output
1089663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      compute_PCMPxSTRx_gen_output_wide(
1090663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV, resOSZACP,
1091663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1092663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
1093663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1094663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      return True;
1095663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
1096663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1097663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1098663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*-- set membership on wide data        --*/
1099663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1100663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1101663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (agg == 0/*equal any, aka find chars in a set*/
1102663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng       && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1103663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      /* argL: the string,  argR: charset */
1104663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    si, ci;
1105663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argL    = (UShort*)argLV;
1106663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argR    = (UShort*)argRV;
1107663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    boolRes = 0;
1108663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1109663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1110663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1111663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      for (si = 0; si < 8; si++) {
1112663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         if ((validL & (1 << si)) == 0)
1113663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            // run off the end of the string.
1114663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            break;
1115663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         UInt m = 0;
1116663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         for (ci = 0; ci < 8; ci++) {
1117663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if ((validR & (1 << ci)) == 0) break;
1118663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if (argR[ci] == argL[si]) { m = 1; break; }
1119663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         }
1120663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         boolRes |= (m << si);
1121663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
1122663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1123663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // boolRes is "pre-invalidated"
1124663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt intRes1 = boolRes & 0xFF;
1125663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1126663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate I-format output
1127663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      compute_PCMPxSTRx_gen_output_wide(
1128663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV, resOSZACP,
1129663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1130663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
1131663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1132663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      return True;
1133663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
1134663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1135663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1136663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*-- substring search on wide data      --*/
1137663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1138663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1139663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (agg == 3/*equal ordered, aka substring search*/
1140663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng       && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1141663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1142663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      /* argL: haystack,  argR: needle */
1143663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    ni, hi;
1144663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argL    = (UShort*)argLV;
1145663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argR    = (UShort*)argRV;
1146663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    boolRes = 0;
1147663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1148663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1149663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      for (hi = 0; hi < 8; hi++) {
1150663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         UInt m = 1;
1151663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         for (ni = 0; ni < 8; ni++) {
1152663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if ((validR & (1 << ni)) == 0) break;
1153663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            UInt i = ni + hi;
1154663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if (i >= 8) break;
1155663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if (argL[i] != argR[ni]) { m = 0; break; }
1156663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         }
1157663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         boolRes |= (m << hi);
1158663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         if ((validL & (1 << hi)) == 0)
1159663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            // run off the end of the haystack
1160663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            break;
1161663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
1162663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1163663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // boolRes is "pre-invalidated"
1164663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt intRes1 = boolRes & 0xFF;
1165663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1166663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate I-format output
1167663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      compute_PCMPxSTRx_gen_output_wide(
1168663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV, resOSZACP,
1169663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1170663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
1171663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1172663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      return True;
1173663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
1174663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1175663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1176663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*-- ranges, unsigned wide data         --*/
1177663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   /*----------------------------------------*/
1178663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1179663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   if (agg == 1/*ranges*/
1180663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng       && fmt == 1/*uw*/) {
1181663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1182663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      /* argL: string,  argR: range-pairs */
1183663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    ri, si;
1184663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argL    = (UShort*)argLV;
1185663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UShort* argR    = (UShort*)argRV;
1186663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    boolRes = 0;
1187663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    validL  = ~(zmaskL | -zmaskL);  // not(left(zmaskL))
1188663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt    validR  = ~(zmaskR | -zmaskR);  // not(left(zmaskR))
1189663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      for (si = 0; si < 8; si++) {
1190663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         if ((validL & (1 << si)) == 0)
1191663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            // run off the end of the string
1192663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            break;
1193663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         UInt m = 0;
1194663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         for (ri = 0; ri < 8; ri += 2) {
1195663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if ((validR & (3 << ri)) != (3 << ri)) break;
1196663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
1197663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng               m = 1; break;
1198663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng            }
1199663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         }
1200663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         boolRes |= (m << si);
1201663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      }
1202663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1203663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // boolRes is "pre-invalidated"
1204663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      UInt intRes1 = boolRes & 0xFF;
1205663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1206663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      // generate I-format output
1207663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      compute_PCMPxSTRx_gen_output_wide(
1208663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         resV, resOSZACP,
1209663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng         intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1210663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      );
1211663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1212663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng      return True;
1213663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng   }
1214663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng
1215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown   return False;
1216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown}
1217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
1218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown
1219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
1220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end                                 guest_generic_x87.c ---*/
1221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/
1222