1ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 2ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 3ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- begin guest_generic_x87.c ---*/ 4ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 5ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 6ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 7ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This file is part of Valgrind, a dynamic binary instrumentation 8ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown framework. 9ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 10436e89c602e787e7a27dd6624b09beed41a0da8aDmitriy Ivanov Copyright (C) 2004-2013 OpenWorks LLP 11ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown info@open-works.net 12ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 13ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is free software; you can redistribute it and/or 14ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown modify it under the terms of the GNU General Public License as 15ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown published by the Free Software Foundation; either version 2 of the 16ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown License, or (at your option) any later version. 17ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 18ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown This program is distributed in the hope that it will be useful, but 19ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown WITHOUT ANY WARRANTY; without even the implied warranty of 20ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown General Public License for more details. 22ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 23ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown You should have received a copy of the GNU General Public License 24ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown along with this program; if not, write to the Free Software 25ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 02110-1301, USA. 27ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 28ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown The GNU General Public License is contained in the file COPYING. 29ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 30ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Neither the names of the U.S. Department of Energy nor the 31ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown University of California nor the names of its contributors may be 32ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown used to endorse or promote products derived from this software 33ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown without prior written permission. 34ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 35ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 36ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* This file contains functions for doing some x87-specific 37ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown operations. Both the amd64 and x86 front ends (guests) indirectly 38ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown call these functions via guest helper calls. By putting them here, 39ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown code duplication is avoided. Some of these functions are tricky 40ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown and hard to verify, so there is much to be said for only having one 41ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown copy thereof. 42ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 43ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 44ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "libvex_basictypes.h" 45ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 46ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "main_util.h" 47ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#include "guest_generic_x87.h" 48ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 49ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 50ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* 80 and 64-bit floating point formats: 51ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 52ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 80-bit: 53ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 54ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 0 0-------0 zero 55ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 0 0X------X denormals 56ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 1-7FFE 1X------X normals (all normals have leading 1) 57ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FFF 10------0 infinity 58ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FFF 10X-----X snan 59ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FFF 11X-----X qnan 60ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 61ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S is the sign bit. For runs X----X, at least one of the Xs must be 62ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown nonzero. Exponent is 15 bits, fractional part is 63 bits, and 63ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown there is an explicitly represented leading 1, and a sign bit, 64ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown giving 80 in total. 65ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 66ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 64-bit avoids the confusion of an explicitly represented leading 1 67ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown and so is simpler: 68ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 69ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 0 0------0 zero 70ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 0 X------X denormals 71ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 1-7FE any normals 72ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FF 0------0 infinity 73ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FF 0X-----X snan 74ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FF 1X-----X qnan 75ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 76ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Exponent is 11 bits, fractional part is 52 bits, and there is a 77ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sign bit, giving 64 in total. 78ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 79ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 80ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 81ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline UInt read_bit_array ( UChar* arr, UInt n ) 82ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 83ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar c = arr[n >> 3]; 84ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c >>= (n&7); 85ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return c & 1; 86ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 87ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 88ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic inline void write_bit_array ( UChar* arr, UInt n, UInt b ) 89ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 90ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar c = arr[n >> 3]; 91ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c = toUChar( c & ~(1 << (n&7)) ); 92ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown c = toUChar( c | ((b&1) << (n&7)) ); 93ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown arr[n >> 3] = c; 94ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 95ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 96ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Convert an IEEE754 double (64-bit) into an x87 extended double 97ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (80-bit), mimicing the hardware fairly closely. Both numbers are 98ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown stored little-endian. Limitations, all of which could be fixed, 99ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown given some level of hassle: 100ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 101ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Identity of NaNs is not preserved. 102ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 103ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown See comments in the code for more details. 104ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 105ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 ) 106ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 107ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool mantissaIsZero; 108ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int bexp, i, j, shift; 109ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar sign; 110ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 111ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sign = toUChar( (f64[7] >> 7) & 1 ); 112ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); 113ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp &= 0x7FF; 114ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 115ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mantissaIsZero = False; 116ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp == 0 || bexp == 0x7FF) { 117ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* We'll need to know whether or not the mantissa (bits 51:0) is 118ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown all zeroes in order to handle these cases. So figure it 119ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown out. */ 120ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mantissaIsZero 121ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = toBool( 122ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (f64[6] & 0x0F) == 0 123ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && f64[5] == 0 && f64[4] == 0 && f64[3] == 0 124ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && f64[2] == 0 && f64[1] == 0 && f64[0] == 0 125ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ); 126ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 127ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 128ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If the exponent is zero, either we have a zero or a denormal. 129ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Produce a zero. This is a hack in that it forces denormals to 130ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown zero. Could do better. */ 131ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp == 0) { 132ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[9] = toUChar( sign << 7 ); 133ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[8] = f80[7] = f80[6] = f80[5] = f80[4] 134ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = f80[3] = f80[2] = f80[1] = f80[0] = 0; 135ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 136ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (mantissaIsZero) 137ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* It really is zero, so that's all we can do. */ 138ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 139ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 140ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* There is at least one 1-bit in the mantissa. So it's a 141ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown potentially denormalised double -- but we can produce a 142ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown normalised long double. Count the leading zeroes in the 143ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mantissa so as to decide how much to bump the exponent down 144ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown by. Note, this is SLOW. */ 145ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown shift = 0; 146ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 51; i >= 0; i--) { 147ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (read_bit_array(f64, i)) 148ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 149ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown shift++; 150ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 151ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 152ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* and copy into place as many bits as we can get our hands on. */ 153ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown j = 63; 154ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 51 - shift; i >= 0; i--) { 155ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown write_bit_array( f80, j, 156ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown read_bit_array( f64, i ) ); 157ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown j--; 158ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 159ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 160ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Set the exponent appropriately, and we're done. */ 161ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp -= shift; 162ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp += (16383 - 1023); 163ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); 164ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[8] = toUChar( bexp & 0xFF ); 165ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 166ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 167ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 168ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If the exponent is 7FF, this is either an Infinity, a SNaN or 169ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown QNaN, as determined by examining bits 51:0, thus: 170ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 0 ... 0 Inf 171ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 0X ... X SNaN 172ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 1X ... X QNaN 173ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown where at least one of the Xs is not zero. 174ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 175ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp == 0x7FF) { 176ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (mantissaIsZero) { 177ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Produce an appropriately signed infinity: 178ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 1--1 (15) 1 0--0 (63) 179ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 180ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[9] = toUChar( (sign << 7) | 0x7F ); 181ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[8] = 0xFF; 182ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[7] = 0x80; 183ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[6] = f80[5] = f80[4] = f80[3] 184ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = f80[2] = f80[1] = f80[0] = 0; 185ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 186ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 187ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* So it's either a QNaN or SNaN. Distinguish by considering 188ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bit 51. Note, this destroys all the trailing bits 189ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (identity?) of the NaN. IEEE754 doesn't require preserving 190ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown these (it only requires that there be one QNaN value and one 191ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown SNaN value), but x87 does seem to have some ability to 192ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown preserve them. Anyway, here, the NaN's identity is 193ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown destroyed. Could be improved. */ 194ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (f64[6] & 8) { 195663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* QNaN. Make a canonical QNaN: 196663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng S 1--1 (15) 1 1 0--0 (62) 197ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 198ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[9] = toUChar( (sign << 7) | 0x7F ); 199ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[8] = 0xFF; 200663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng f80[7] = 0xC0; 201ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[6] = f80[5] = f80[4] = f80[3] 202663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng = f80[2] = f80[1] = f80[0] = 0x00; 203ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 204ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* SNaN. Make a SNaN: 205663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng S 1--1 (15) 1 0 1--1 (62) 206ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 207ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[9] = toUChar( (sign << 7) | 0x7F ); 208ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[8] = 0xFF; 209663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng f80[7] = 0xBF; 210ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[6] = f80[5] = f80[4] = f80[3] 211ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = f80[2] = f80[1] = f80[0] = 0xFF; 212ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 213ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 214ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* It's not a zero, denormal, infinity or nan. So it must be a 217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown normalised number. Rebias the exponent and build the new 218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown number. */ 219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp += (16383 - 1023); 220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); 222ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[8] = toUChar( bexp & 0xFF ); 223ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78) 224ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown | ((f64[5] >> 5) & 7) ); 225ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) ); 226ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) ); 227ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) ); 228ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) ); 229ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) ); 230ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[1] = toUChar( ((f64[0] << 3) & 0xF8) ); 231ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f80[0] = toUChar( 0 ); 232ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 233ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 234ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 235ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Convert an x87 extended double (80-bit) into an IEEE 754 double 236ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (64-bit), mimicking the hardware fairly closely. Both numbers are 237ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown stored little-endian. Limitations, both of which could be fixed, 238ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown given some level of hassle: 239ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 240ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Rounding following truncation could be a bit better. 241ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 242ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown * Identity of NaNs is not preserved. 243ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 244ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown See comments in the code for more details. 245ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 246ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 ) 247ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 248ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool isInf; 249ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int bexp, i, j; 250ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar sign; 251ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 252ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sign = toUChar((f80[9] >> 7) & 1); 253ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8]; 254ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp &= 0x7FFF; 255ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 256ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If the exponent is zero, either we have a zero or a denormal. 257ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown But an extended precision denormal becomes a double precision 258ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown zero, so in either case, just produce the appropriately signed 259ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown zero. */ 260ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp == 0) { 261ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar(sign << 7); 262ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 263ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 264ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 265ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 266ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If the exponent is 7FFF, this is either an Infinity, a SNaN or 267ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown QNaN, as determined by examining bits 62:0, thus: 268663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 10 ... 0 Inf 269663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 10X ... X SNaN 270663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 11X ... X QNaN 271ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown where at least one of the Xs is not zero. 272ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 273ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp == 0x7FFF) { 274ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown isInf = toBool( 275ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (f80[7] & 0x7F) == 0 276ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && f80[6] == 0 && f80[5] == 0 && f80[4] == 0 277ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && f80[3] == 0 && f80[2] == 0 && f80[1] == 0 278ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && f80[0] == 0 279ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ); 280ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (isInf) { 281ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0 == (f80[7] & 0x80)) 282ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown goto wierd_NaN; 283ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Produce an appropriately signed infinity: 284ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 1--1 (11) 0--0 (52) 285ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 286ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar((sign << 7) | 0x7F); 287ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = 0xF0; 288ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 289ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 290ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 291ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* So it's either a QNaN or SNaN. Distinguish by considering 292663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng bit 61. Note, this destroys all the trailing bits 293ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown (identity?) of the NaN. IEEE754 doesn't require preserving 294ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown these (it only requires that there be one QNaN value and one 295ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown SNaN value), but x87 does seem to have some ability to 296ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown preserve them. Anyway, here, the NaN's identity is 297ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown destroyed. Could be improved. */ 298663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (f80[7] & 0x40) { 299663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* QNaN. Make a canonical QNaN: 300663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng S 1--1 (11) 1 0--0 (51) 301ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 302ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar((sign << 7) | 0x7F); 303663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng f64[6] = 0xF8; 304663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0x00; 305ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 306ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* SNaN. Make a SNaN: 307ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 1--1 (11) 0 1--1 (51) 308ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 309ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar((sign << 7) | 0x7F); 310ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = 0xF7; 311ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; 312ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 313ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 314ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 315ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 316ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is 317ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown zero, the x87 FPU appears to consider the number denormalised 318ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown and converts it to a QNaN. */ 319ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (0 == (f80[7] & 0x80)) { 320ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown wierd_NaN: 321ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Strange hardware QNaN: 322ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 1--1 (11) 1 0--0 (51) 323ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 324ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* On a PIII, these QNaNs always appear with sign==1. I have 325ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown no idea why. */ 326ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = (1 /*sign*/ << 7) | 0x7F; 327ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = 0xF8; 328ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 329ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 330ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 331ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 332ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* It's not a zero, denormal, infinity or nan. So it must be a 333ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown normalised number. Rebias the exponent and consider. */ 334ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bexp -= (16383 - 1023); 335ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp >= 0x7FF) { 336ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* It's too big for a double. Construct an infinity. */ 337ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar((sign << 7) | 0x7F); 338ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = 0xF0; 339ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 340ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 341ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 342ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 343ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp <= 0) { 344ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* It's too small for a normalised double. First construct a 345ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown zero and then see if it can be improved into a denormal. */ 346ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar(sign << 7); 347ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; 348ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 349ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (bexp < -52) 350ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Too small even for a denormal. */ 351ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 352ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 353ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Ok, let's make a denormal. Note, this is SLOW. */ 354ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Copy bits 63, 62, 61, etc of the src mantissa into the dst, 355ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */ 356ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* bexp is in range -52 .. 0 inclusive */ 357ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 63; i >= 0; i--) { 358ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown j = i - 12 + bexp; 359ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (j < 0) break; 360ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* We shouldn't really call vassert from generated code. */ 361ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vassert(j >= 0 && j < 52); 362ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown write_bit_array ( f64, 363ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown j, 364ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown read_bit_array ( f80, i ) ); 365ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 366ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* and now we might have to round ... */ 367ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (read_bit_array(f80, 10+1 - bexp) == 1) 368ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown goto do_rounding; 369ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 370ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 371ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 372ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 373ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Ok, it's a normalised number which is representable as a double. 374ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Copy the exponent and mantissa into place. */ 375ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* 376ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < 52; i++) 377ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown write_bit_array ( f64, 378ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown i, 379ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown read_bit_array ( f80, i+11 ) ); 380ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 381ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) ); 382ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) ); 383ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) ); 384ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) ); 385ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) ); 386ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) ); 387ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 388ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) ); 389ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 390ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) ); 391ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 392ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Now consider any rounding that needs to happen as a result of 393ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown truncating the mantissa. */ 394ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ { 395ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 396ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* If the bottom bits of f80 are "100 0000 0000", then the 397ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown infinitely precise value is deemed to be mid-way between the 398ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown two closest representable values. Since we're doing 399ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown round-to-nearest (the default mode), in that case it is the 400ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown bit immediately above which indicates whether we should round 401ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown upwards or not -- if 0, we don't. All that is encapsulated 402ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown in the following simple test. */ 403ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0) 404ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return; 405ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 406ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown do_rounding: 407ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Round upwards. This is a kludge. Once in every 2^24 408ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown roundings (statistically) the bottom three bytes are all 0xFF 409ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown and so we don't round at all. Could be improved. */ 410ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (f64[0] != 0xFF) { 411ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[0]++; 412ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 413ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown else 414ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (f64[0] == 0xFF && f64[1] != 0xFF) { 415ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[0] = 0; 416ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[1]++; 417ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 418ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown else 419ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) { 420ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[0] = 0; 421ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[1] = 0; 422ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown f64[2]++; 423ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 424ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* else we don't round, but we should. */ 425ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 426ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 427ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 428ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 429ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* CALLED FROM GENERATED CODE: CLEAN HELPER */ 430ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Extract the signed significand or exponent component as per 431ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown fxtract. Arg and result are doubles travelling under the guise of 432ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ULongs. Returns significand when getExp is zero and exponent 433ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown otherwise. */ 434ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp ) 435ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 436ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ULong uSig, uExp; 437ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Long sSig; */ 438ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int sExp, i; 439ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt sign, expExp; 440ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 441ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* 442ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FF 0------0 infinity 443ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FF 0X-----X snan 444ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown S 7FF 1X-----X qnan 445ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 446ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong posInf = 0x7FF0000000000000ULL; 447ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong negInf = 0xFFF0000000000000ULL; 448ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong nanMask = 0x7FF0000000000000ULL; 449ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong qNan = 0x7FF8000000000000ULL; 450ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong posZero = 0x0000000000000000ULL; 451ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong negZero = 0x8000000000000000ULL; 452ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong bit51 = 1ULL << 51; 453ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong bit52 = 1ULL << 52; 454ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown const ULong sigMask = bit52 - 1; 455ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 456ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Mimic Core i5 behaviour for special cases. */ 457ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (arg == posInf) 458ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return getExp ? posInf : posInf; 459ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (arg == negInf) 460ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return getExp ? posInf : negInf; 461ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((arg & nanMask) == nanMask) 462ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return qNan | (arg & (1ULL << 63)); 463ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (arg == posZero) 464ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return getExp ? negInf : posZero; 465ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (arg == negZero) 466ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return getExp ? negInf : negZero; 467ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 468ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Split into sign, exponent and significand. */ 469ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sign = ((UInt)(arg >> 63)) & 1; 470ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 471ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */ 472ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig = arg & sigMask; 473ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 474ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Get the exponent. */ 475ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sExp = ((Int)(arg >> 52)) & 0x7FF; 476ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 477ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Deal with denormals: if the exponent is zero, then the 478ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown significand cannot possibly be zero (negZero/posZero are handled 479ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown above). Shift the significand left until bit 51 of it becomes 480ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 1, and decrease the exponent accordingly. 481ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown */ 482ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (sExp == 0) { 483ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < 52; i++) { 484ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (uSig & bit51) 485ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 486ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig <<= 1; 487ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sExp--; 488ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 489ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig <<= 1; 490ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 491ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Add the implied leading-1 in the significand. */ 492ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig |= bit52; 493ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 494ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 495ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Roll in the sign. */ 496ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* sSig = uSig; */ 497ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* if (sign) sSig =- sSig; */ 498ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 499ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Convert sig into a double. This should be an exact conversion. 500ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Then divide by 2^52, which should give a value in the range 1.0 501ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown to 2.0-epsilon, at least for normalised args. */ 502ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* dSig = (Double)sSig; */ 503ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* dSig /= 67108864.0; */ /* 2^26 */ 504ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* dSig /= 67108864.0; */ /* 2^26 */ 505ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig &= sigMask; 506ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig |= 0x3FF0000000000000ULL; 507ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (sign) 508ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uSig ^= negZero; 509ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 510ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Convert exp into a double. Also an exact conversion. */ 511ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* dExp = (Double)(sExp - 1023); */ 512ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown sExp -= 1023; 513ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (sExp == 0) { 514ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uExp = 0; 515ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 516ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uExp = sExp < 0 ? -sExp : sExp; 517ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown expExp = 0x3FF +52; 518ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* 1 <= uExp <= 1074 */ 519ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Skip first 42 iterations of normalisation loop as we know they 520ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown will always happen */ 521ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uExp <<= 42; 522ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown expExp -= 42; 523ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 0; i < 52-42; i++) { 524ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (uExp & bit52) 525ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 526ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uExp <<= 1; 527ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown expExp--; 528ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 529ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uExp &= sigMask; 530ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown uExp |= ((ULong)expExp) << 52; 531ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (sExp < 0) uExp ^= negZero; 532ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 533ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 534ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return getExp ? uExp : uSig; 535ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 536ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 537ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 538ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 539ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------*/ 540ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- SSE4.2 PCMP{E,I}STR{I,M} helpers ---*/ 541ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------*/ 542ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 543ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* We need the definitions for OSZACP eflags/rflags offsets. 544ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown #including guest_{amd64,x86}_defs.h causes chaos, so just copy the 545ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown required values directly. They are not going to change in the 546ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown foreseeable future :-) 547ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 548ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 549ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_O 11 550ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_S 7 551ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_Z 6 552ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_A 4 553ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_C 0 554ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define SHIFT_P 2 555ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 556ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_O (1 << SHIFT_O) 557ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_S (1 << SHIFT_S) 558ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_Z (1 << SHIFT_Z) 559ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_A (1 << SHIFT_A) 560ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_C (1 << SHIFT_C) 561ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown#define MASK_P (1 << SHIFT_P) 562ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 563ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 564ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's 565ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Delight. */ 566ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UInt clz32 ( UInt x ) 567ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 568ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int y, m, n; 569ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown y = -(x >> 16); 570ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown m = (y >> 16) & 16; 571ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown n = 16 - m; 572ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown x = x >> m; 573ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown y = x - 0x100; 574ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown m = (y >> 16) & 8; 575ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown n = n + m; 576ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown x = x << m; 577ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown y = x - 0x1000; 578ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown m = (y >> 16) & 4; 579ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown n = n + m; 580ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown x = x << m; 581ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown y = x - 0x4000; 582ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown m = (y >> 16) & 2; 583ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown n = n + m; 584ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown x = x << m; 585ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown y = x >> 14; 586ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown m = y & ~(y >> 1); 587ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return n + 2 - m; 588ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 589ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 590ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UInt ctz32 ( UInt x ) 591ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 592ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return 32 - clz32((~x) & (x-1)); 593ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 594ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 595ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Convert a 4-bit value to a 32-bit value by cloning each bit 8 596ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown times. There's surely a better way to do this, but I don't know 597ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown what it is. */ 598ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic UInt bits4_to_bytes4 ( UInt bits4 ) 599ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 600ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt r = 0; 601ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown r |= (bits4 & 1) ? 0x000000FF : 0; 602ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown r |= (bits4 & 2) ? 0x0000FF00 : 0; 603ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown r |= (bits4 & 4) ? 0x00FF0000 : 0; 604ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown r |= (bits4 & 8) ? 0xFF000000 : 0; 605ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return r; 606ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 607ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 608ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 609663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* Convert a 2-bit value to a 32-bit value by cloning each bit 16 610663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng times. There's surely a better way to do this, but I don't know 611663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng what it is. */ 612663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic UInt bits2_to_bytes4 ( UInt bits2 ) 613663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 614663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt r = 0; 615663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng r |= (bits2 & 1) ? 0x0000FFFF : 0; 616663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng r |= (bits2 & 2) ? 0xFFFF0000 : 0; 617663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return r; 618663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 619663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 620663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 621ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Given partial results from a pcmpXstrX operation (intRes1, 622ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown basically), generate an I- or M-format output value, also the new 623ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown OSZACP flags. */ 624ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownstatic 625ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brownvoid compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV, 626ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*OUT*/UInt* resOSZACP, 627ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt intRes1, 628ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt zmaskL, UInt zmaskR, 629ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validL, 630ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt pol, UInt idx, 631ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Bool isxSTRM ) 632ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 633ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vassert((pol >> 2) == 0); 634ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vassert((idx >> 1) == 0); 635ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 636ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt intRes2 = 0; 637ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown switch (pol) { 638ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 0: intRes2 = intRes1; break; // pol + 639ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 1: intRes2 = ~intRes1; break; // pol - 640ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 2: intRes2 = intRes1; break; // pol m+ 641ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown case 3: intRes2 = intRes1 ^ validL; break; // pol m- 642ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 643ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown intRes2 &= 0xFFFF; 644ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 645ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (isxSTRM) { 646ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 647ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate M-format output (a bit or byte mask in XMM0) 648ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (idx) { 649ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[0] = bits4_to_bytes4( (intRes2 >> 0) & 0xF ); 650ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[1] = bits4_to_bytes4( (intRes2 >> 4) & 0xF ); 651ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[2] = bits4_to_bytes4( (intRes2 >> 8) & 0xF ); 652ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF ); 653ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 654ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[0] = intRes2 & 0xFFFF; 655ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[1] = 0; 656ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[2] = 0; 657ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[3] = 0; 658ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 659ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 660ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 661ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 662ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate I-format output (an index in ECX) 663ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate ecx value 664ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt newECX = 0; 665ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (idx) { 666ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // index of ms-1-bit 667ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); 668ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } else { 669ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // index of ls-1-bit 670ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown newECX = intRes2 == 0 ? 16 : ctz32(intRes2); 671ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 672ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 673ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[0] = newECX; 674ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[1] = 0; 675ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[2] = 0; 676ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV->w32[3] = 0; 677ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 678ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 679ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 680ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate new flags, common to all ISTRI and ISTRM cases 681ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown *resOSZACP // A, P are zero 682ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 683ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 684ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 685ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 686ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 687ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 688ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 689663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* Given partial results from a 16-bit pcmpXstrX operation (intRes1, 690663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng basically), generate an I- or M-format output value, also the new 691663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng OSZACP flags. */ 692663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengstatic 693663860b1408516d02ebfcb3a9999a134e6cfb223Ben Chengvoid compute_PCMPxSTRx_gen_output_wide (/*OUT*/V128* resV, 694663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*OUT*/UInt* resOSZACP, 695663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes1, 696663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt zmaskL, UInt zmaskR, 697663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validL, 698663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt pol, UInt idx, 699663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng Bool isxSTRM ) 700663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 701663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vassert((pol >> 2) == 0); 702663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vassert((idx >> 1) == 0); 703663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 704663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes2 = 0; 705663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng switch (pol) { 706663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng case 0: intRes2 = intRes1; break; // pol + 707663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng case 1: intRes2 = ~intRes1; break; // pol - 708663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng case 2: intRes2 = intRes1; break; // pol m+ 709663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng case 3: intRes2 = intRes1 ^ validL; break; // pol m- 710663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 711663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes2 &= 0xFF; 712663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 713663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (isxSTRM) { 714663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 715663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate M-format output (a bit or byte mask in XMM0) 716663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (idx) { 717663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[0] = bits2_to_bytes4( (intRes2 >> 0) & 0x3 ); 718663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[1] = bits2_to_bytes4( (intRes2 >> 2) & 0x3 ); 719663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[2] = bits2_to_bytes4( (intRes2 >> 4) & 0x3 ); 720663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[3] = bits2_to_bytes4( (intRes2 >> 6) & 0x3 ); 721663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } else { 722663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[0] = intRes2 & 0xFF; 723663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[1] = 0; 724663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[2] = 0; 725663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[3] = 0; 726663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 727663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 728663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } else { 729663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 730663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate I-format output (an index in ECX) 731663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate ecx value 732663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt newECX = 0; 733663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (idx) { 734663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // index of ms-1-bit 735663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2)); 736663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } else { 737663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // index of ls-1-bit 738663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng newECX = intRes2 == 0 ? 8 : ctz32(intRes2); 739663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 740663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 741663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[0] = newECX; 742663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[1] = 0; 743663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[2] = 0; 744663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV->w32[3] = 0; 745663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 746663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 747663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 748663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate new flags, common to all ISTRI and ISTRM cases 749663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng *resOSZACP // A, P are zero 750663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 751663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 752663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 753663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] 754663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 755663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 756663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 757ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 758663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng variants on 8-bit data. 759ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 760ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown For xSTRI variants, the new ECX value is placed in the 32 bits 761ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown pointed to by *resV, and the top 96 bits are zeroed. For xSTRM 762ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown variants, the result is a 128 bit value and is placed at *resV in 763ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown the obvious way. 764ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 765ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown For all variants, the new OSZACP value is placed at *resOSZACP. 766ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 767ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown argLV and argRV are the vector args. The caller must prepare a 768ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 769ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown must be 1 for each zero byte of of the respective arg. For ESTRx 770ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown variants this is derived from the explicit length indication, and 771ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown must be 0 in all places except at the bit index corresponding to 772ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown the valid length (0 .. 16). If the valid length is 16 then the 773ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown mask must be all zeroes. In all cases, bits 31:16 must be zero. 774ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 775ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown imm8 is the original immediate from the instruction. isSTRM 776ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown indicates whether this is a xSTRM or xSTRI variant, which controls 777ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown how much of *res is written. 778ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 779ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown If the given imm8 case can be handled, the return value is True. 780ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown If not, False is returned, and neither *res not *resOSZACP are 781ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown altered. 782ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown*/ 783ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 784ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff BrownBool compute_PCMPxSTRx ( /*OUT*/V128* resV, 785ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*OUT*/UInt* resOSZACP, 786ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown V128* argLV, V128* argRV, 787ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt zmaskL, UInt zmaskR, 788ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt imm8, Bool isxSTRM ) 789ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown{ 790ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vassert(imm8 < 0x80); 791ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vassert((zmaskL >> 16) == 0); 792ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown vassert((zmaskR >> 16) == 0); 793ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 794ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* Explicitly reject any imm8 values that haven't been validated, 795ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown even if they would probably work. Life is too short to have 796ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown unvalidated cases in the code base. */ 797ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown switch (imm8) { 798eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x00: case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x0E: 799eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x12: case 0x14: case 0x1A: 800eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x30: case 0x34: case 0x38: case 0x3A: 801eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x40: case 0x44: case 0x46: case 0x4A: 802ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 803ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown default: 804ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return False; 805ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 806ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 807ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 808ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 809ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 810ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 811ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 812ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 813ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*-- strcmp on byte data --*/ 814ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 815ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 816ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (agg == 2/*equal each, aka strcmp*/ 817ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) { 818ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown Int i; 819ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argL = (UChar*)argLV; 820ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argR = (UChar*)argRV; 821ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt boolResII = 0; 822ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (i = 15; i >= 0; i--) { 823ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar cL = argL[i]; 824ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar cR = argR[i]; 825ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 826ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 827ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 828ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 829ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 830ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // do invalidation, common to all equal-each cases 831ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt intRes1 832ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown = (boolResII & validL & validR) // if both valid, use cmpres 833ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown | (~ (validL | validR)); // if both invalid, force 1 834ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // else force 0 835ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown intRes1 &= 0xFFFF; 836ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 837ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate I-format output 838ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown compute_PCMPxSTRx_gen_output( 839ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV, resOSZACP, 840ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 841ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ); 842ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 843ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return True; 844ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 845ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 846ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 847ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*-- set membership on byte data --*/ 848ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 849ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 850ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (agg == 0/*equal any, aka find chars in a set*/ 851ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) { 852ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* argL: the string, argR: charset */ 853ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt si, ci; 854ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argL = (UChar*)argLV; 855ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argR = (UChar*)argRV; 856ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt boolRes = 0; 857ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 858ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 859ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 860ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (si = 0; si < 16; si++) { 861ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((validL & (1 << si)) == 0) 862ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // run off the end of the string. 863ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 864ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt m = 0; 865ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (ci = 0; ci < 16; ci++) { 866ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((validR & (1 << ci)) == 0) break; 867ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (argR[ci] == argL[si]) { m = 1; break; } 868ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 869ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown boolRes |= (m << si); 870ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 871ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 872ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // boolRes is "pre-invalidated" 873ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt intRes1 = boolRes & 0xFFFF; 874ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 875ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate I-format output 876ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown compute_PCMPxSTRx_gen_output( 877ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV, resOSZACP, 878ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 879ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ); 880ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 881ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return True; 882ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 883ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 884ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 885ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*-- substring search on byte data --*/ 886ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 887ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 888ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (agg == 3/*equal ordered, aka substring search*/ 889ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) { 890ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 891ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* argL: haystack, argR: needle */ 892ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt ni, hi; 893ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argL = (UChar*)argLV; 894ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argR = (UChar*)argRV; 895ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt boolRes = 0; 896ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 897ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 898ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (hi = 0; hi < 16; hi++) { 899ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt m = 1; 900ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (ni = 0; ni < 16; ni++) { 901ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((validR & (1 << ni)) == 0) break; 902ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt i = ni + hi; 903ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (i >= 16) break; 904ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (argL[i] != argR[ni]) { m = 0; break; } 905ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 906ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown boolRes |= (m << hi); 907663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validL & (1 << hi)) == 0) 908663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // run off the end of the haystack 909663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 910ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 911ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 912ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // boolRes is "pre-invalidated" 913ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt intRes1 = boolRes & 0xFFFF; 914ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 915ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate I-format output 916ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown compute_PCMPxSTRx_gen_output( 917ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV, resOSZACP, 918ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 919ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ); 920ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 921ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return True; 922ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 923ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 924ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 925ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*-- ranges, unsigned byte data --*/ 926ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /*----------------------------------------*/ 927ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 928ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (agg == 1/*ranges*/ 929ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown && fmt == 0/*ub*/) { 930ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 931ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown /* argL: string, argR: range-pairs */ 932ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt ri, si; 933ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argL = (UChar*)argLV; 934ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UChar* argR = (UChar*)argRV; 935ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt boolRes = 0; 936ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 937ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 938ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (si = 0; si < 16; si++) { 939ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((validL & (1 << si)) == 0) 940ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // run off the end of the string 941ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown break; 942ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt m = 0; 943ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown for (ri = 0; ri < 16; ri += 2) { 944ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if ((validR & (3 << ri)) != (3 << ri)) break; 945ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 946ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown m = 1; break; 947ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 948ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 949ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown boolRes |= (m << si); 950ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 951ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 952ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // boolRes is "pre-invalidated" 953ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown UInt intRes1 = boolRes & 0xFFFF; 954ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 955ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown // generate I-format output 956ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown compute_PCMPxSTRx_gen_output( 957ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown resV, resOSZACP, 958ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 959ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown ); 960ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 961ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return True; 962ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown } 963ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 964663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 965663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*-- ranges, signed byte data --*/ 966663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 967663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 968663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (agg == 1/*ranges*/ 969663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng && fmt == 2/*sb*/) { 970663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 971663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* argL: string, argR: range-pairs */ 972663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt ri, si; 973663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng Char* argL = (Char*)argLV; 974663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng Char* argR = (Char*)argRV; 975663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt boolRes = 0; 976663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 977663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 978663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (si = 0; si < 16; si++) { 979663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validL & (1 << si)) == 0) 980663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // run off the end of the string 981663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 982663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt m = 0; 983663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (ri = 0; ri < 16; ri += 2) { 984663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validR & (3 << ri)) != (3 << ri)) break; 985663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 986663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng m = 1; break; 987663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 988663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 989663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng boolRes |= (m << si); 990663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 991663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 992663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // boolRes is "pre-invalidated" 993663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes1 = boolRes & 0xFFFF; 994663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 995663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate I-format output 996663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng compute_PCMPxSTRx_gen_output( 997663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV, resOSZACP, 998663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 999663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng ); 1000663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1001663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return True; 1002663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1003663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1004663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return False; 1005663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng} 1006663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1007663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1008663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} 1009663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng variants on 16-bit characters. 1010663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1011663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng For xSTRI variants, the new ECX value is placed in the 32 bits 1012663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng pointed to by *resV, and the top 96 bits are zeroed. For xSTRM 1013663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng variants, the result is a 128 bit value and is placed at *resV in 1014663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng the obvious way. 1015663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1016663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng For all variants, the new OSZACP value is placed at *resOSZACP. 1017663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1018663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng argLV and argRV are the vector args. The caller must prepare a 1019663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this 1020663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng must be 1 for each zero byte of of the respective arg. For ESTRx 1021663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng variants this is derived from the explicit length indication, and 1022663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng must be 0 in all places except at the bit index corresponding to 1023663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng the valid length (0 .. 8). If the valid length is 8 then the 1024663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng mask must be all zeroes. In all cases, bits 31:8 must be zero. 1025663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1026663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng imm8 is the original immediate from the instruction. isSTRM 1027663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng indicates whether this is a xSTRM or xSTRI variant, which controls 1028663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng how much of *res is written. 1029663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1030663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng If the given imm8 case can be handled, the return value is True. 1031663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng If not, False is returned, and neither *res not *resOSZACP are 1032663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng altered. 1033663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng*/ 1034663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1035663860b1408516d02ebfcb3a9999a134e6cfb223Ben ChengBool compute_PCMPxSTRx_wide ( /*OUT*/V128* resV, 1036663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*OUT*/UInt* resOSZACP, 1037663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng V128* argLV, V128* argRV, 1038663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt zmaskL, UInt zmaskR, 1039663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt imm8, Bool isxSTRM ) 1040663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng{ 1041663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vassert(imm8 < 0x80); 1042663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vassert((zmaskL >> 8) == 0); 1043663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng vassert((zmaskR >> 8) == 0); 1044663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1045663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* Explicitly reject any imm8 values that haven't been validated, 1046663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng even if they would probably work. Life is too short to have 1047663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng unvalidated cases in the code base. */ 1048663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng switch (imm8) { 1049eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D: 1050eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x13: case 0x1B: 1051eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x39: case 0x3B: 1052eb0bae136f4eeaaf29761dddb148b118fb824632Dmitriy Ivanov case 0x45: case 0x4B: 1053663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 1054663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng default: 1055663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return False; 1056663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1057663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1058663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format 1059663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn 1060663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity 1061663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask 1062663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1063663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1064663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*-- strcmp on wide data --*/ 1065663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1066663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1067663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (agg == 2/*equal each, aka strcmp*/ 1068663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 1069663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng Int i; 1070663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argL = (UShort*)argLV; 1071663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argR = (UShort*)argRV; 1072663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt boolResII = 0; 1073663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (i = 7; i >= 0; i--) { 1074663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort cL = argL[i]; 1075663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort cR = argR[i]; 1076663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); 1077663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1078663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1079663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1080663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1081663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // do invalidation, common to all equal-each cases 1082663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes1 1083663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng = (boolResII & validL & validR) // if both valid, use cmpres 1084663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng | (~ (validL | validR)); // if both invalid, force 1 1085663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // else force 0 1086663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes1 &= 0xFF; 1087663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1088663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate I-format output 1089663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng compute_PCMPxSTRx_gen_output_wide( 1090663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV, resOSZACP, 1091663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 1092663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng ); 1093663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1094663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return True; 1095663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1096663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1097663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1098663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*-- set membership on wide data --*/ 1099663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1100663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1101663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (agg == 0/*equal any, aka find chars in a set*/ 1102663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 1103663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* argL: the string, argR: charset */ 1104663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt si, ci; 1105663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argL = (UShort*)argLV; 1106663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argR = (UShort*)argRV; 1107663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt boolRes = 0; 1108663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1109663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1110663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1111663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (si = 0; si < 8; si++) { 1112663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validL & (1 << si)) == 0) 1113663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // run off the end of the string. 1114663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 1115663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt m = 0; 1116663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (ci = 0; ci < 8; ci++) { 1117663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validR & (1 << ci)) == 0) break; 1118663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (argR[ci] == argL[si]) { m = 1; break; } 1119663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1120663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng boolRes |= (m << si); 1121663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1122663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1123663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // boolRes is "pre-invalidated" 1124663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes1 = boolRes & 0xFF; 1125663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1126663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate I-format output 1127663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng compute_PCMPxSTRx_gen_output_wide( 1128663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV, resOSZACP, 1129663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 1130663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng ); 1131663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1132663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return True; 1133663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1134663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1135663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1136663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*-- substring search on wide data --*/ 1137663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1138663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1139663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (agg == 3/*equal ordered, aka substring search*/ 1140663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) { 1141663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1142663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* argL: haystack, argR: needle */ 1143663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt ni, hi; 1144663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argL = (UShort*)argLV; 1145663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argR = (UShort*)argRV; 1146663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt boolRes = 0; 1147663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1148663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1149663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (hi = 0; hi < 8; hi++) { 1150663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt m = 1; 1151663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (ni = 0; ni < 8; ni++) { 1152663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validR & (1 << ni)) == 0) break; 1153663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt i = ni + hi; 1154663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (i >= 8) break; 1155663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (argL[i] != argR[ni]) { m = 0; break; } 1156663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1157663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng boolRes |= (m << hi); 1158663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validL & (1 << hi)) == 0) 1159663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // run off the end of the haystack 1160663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 1161663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1162663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1163663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // boolRes is "pre-invalidated" 1164663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes1 = boolRes & 0xFF; 1165663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1166663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate I-format output 1167663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng compute_PCMPxSTRx_gen_output_wide( 1168663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV, resOSZACP, 1169663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 1170663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng ); 1171663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1172663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return True; 1173663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1174663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1175663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1176663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*-- ranges, unsigned wide data --*/ 1177663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /*----------------------------------------*/ 1178663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1179663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (agg == 1/*ranges*/ 1180663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng && fmt == 1/*uw*/) { 1181663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1182663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng /* argL: string, argR: range-pairs */ 1183663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt ri, si; 1184663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argL = (UShort*)argLV; 1185663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UShort* argR = (UShort*)argRV; 1186663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt boolRes = 0; 1187663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) 1188663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) 1189663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (si = 0; si < 8; si++) { 1190663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validL & (1 << si)) == 0) 1191663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // run off the end of the string 1192663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng break; 1193663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt m = 0; 1194663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng for (ri = 0; ri < 8; ri += 2) { 1195663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if ((validR & (3 << ri)) != (3 << ri)) break; 1196663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { 1197663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng m = 1; break; 1198663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1199663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1200663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng boolRes |= (m << si); 1201663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1202663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1203663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // boolRes is "pre-invalidated" 1204663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng UInt intRes1 = boolRes & 0xFF; 1205663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1206663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng // generate I-format output 1207663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng compute_PCMPxSTRx_gen_output_wide( 1208663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng resV, resOSZACP, 1209663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM 1210663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng ); 1211663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1212663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng return True; 1213663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng } 1214663860b1408516d02ebfcb3a9999a134e6cfb223Ben Cheng 1215ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown return False; 1216ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown} 1217ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 1218ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown 1219ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 1220ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*--- end guest_generic_x87.c ---*/ 1221ed07e00d438c74b7a23c01bfffde77e3968305e4Jeff Brown/*---------------------------------------------------------------*/ 1222