libvex_ir.h revision 74142b8c8d5d3b3db17d744f5d5fb80f548bcf74
1 2/*---------------------------------------------------------------*/ 3/*--- begin libvex_ir.h ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2012 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#ifndef __LIBVEX_IR_H 37#define __LIBVEX_IR_H 38 39#include "libvex_basictypes.h" 40 41 42/*---------------------------------------------------------------*/ 43/*--- High-level IR description ---*/ 44/*---------------------------------------------------------------*/ 45 46/* Vex IR is an architecture-neutral intermediate representation. 47 Unlike some IRs in systems similar to Vex, it is not like assembly 48 language (ie. a list of instructions). Rather, it is more like the 49 IR that might be used in a compiler. 50 51 Code blocks 52 ~~~~~~~~~~~ 53 The code is broken into small code blocks ("superblocks", type: 54 'IRSB'). Each code block typically represents from 1 to perhaps 50 55 instructions. IRSBs are single-entry, multiple-exit code blocks. 56 Each IRSB contains three things: 57 - a type environment, which indicates the type of each temporary 58 value present in the IRSB 59 - a list of statements, which represent code 60 - a jump that exits from the end the IRSB 61 Because the blocks are multiple-exit, there can be additional 62 conditional exit statements that cause control to leave the IRSB 63 before the final exit. Also because of this, IRSBs can cover 64 multiple non-consecutive sequences of code (up to 3). These are 65 recorded in the type VexGuestExtents (see libvex.h). 66 67 Statements and expressions 68 ~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 Statements (type 'IRStmt') represent operations with side-effects, 70 eg. guest register writes, stores, and assignments to temporaries. 71 Expressions (type 'IRExpr') represent operations without 72 side-effects, eg. arithmetic operations, loads, constants. 73 Expressions can contain sub-expressions, forming expression trees, 74 eg. (3 + (4 * load(addr1)). 75 76 Storage of guest state 77 ~~~~~~~~~~~~~~~~~~~~~~ 78 The "guest state" contains the guest registers of the guest machine 79 (ie. the machine that we are simulating). It is stored by default 80 in a block of memory supplied by the user of the VEX library, 81 generally referred to as the guest state (area). To operate on 82 these registers, one must first read ("Get") them from the guest 83 state into a temporary value. Afterwards, one can write ("Put") 84 them back into the guest state. 85 86 Get and Put are characterised by a byte offset into the guest 87 state, a small integer which effectively gives the identity of the 88 referenced guest register, and a type, which indicates the size of 89 the value to be transferred. 90 91 The basic "Get" and "Put" operations are sufficient to model normal 92 fixed registers on the guest. Selected areas of the guest state 93 can be treated as a circular array of registers (type: 94 'IRRegArray'), which can be indexed at run-time. This is done with 95 the "GetI" and "PutI" primitives. This is necessary to describe 96 rotating register files, for example the x87 FPU stack, SPARC 97 register windows, and the Itanium register files. 98 99 Examples, and flattened vs. unflattened code 100 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 For example, consider this x86 instruction: 102 103 addl %eax, %ebx 104 105 One Vex IR translation for this code would be this: 106 107 ------ IMark(0x24F275, 7, 0) ------ 108 t3 = GET:I32(0) # get %eax, a 32-bit integer 109 t2 = GET:I32(12) # get %ebx, a 32-bit integer 110 t1 = Add32(t3,t2) # addl 111 PUT(0) = t1 # put %eax 112 113 (For simplicity, this ignores the effects on the condition codes, and 114 the update of the instruction pointer.) 115 116 The "IMark" is an IR statement that doesn't represent actual code. 117 Instead it indicates the address and length of the original 118 instruction. The numbers 0 and 12 are offsets into the guest state 119 for %eax and %ebx. The full list of offsets for an architecture 120 <ARCH> can be found in the type VexGuest<ARCH>State in the file 121 VEX/pub/libvex_guest_<ARCH>.h. 122 123 The five statements in this example are: 124 - the IMark 125 - three assignments to temporaries 126 - one register write (put) 127 128 The six expressions in this example are: 129 - two register reads (gets) 130 - one arithmetic (add) operation 131 - three temporaries (two nested within the Add32, one in the PUT) 132 133 The above IR is "flattened", ie. all sub-expressions are "atoms", 134 either constants or temporaries. An equivalent, unflattened version 135 would be: 136 137 PUT(0) = Add32(GET:I32(0), GET:I32(12)) 138 139 IR is guaranteed to be flattened at instrumentation-time. This makes 140 instrumentation easier. Equivalent flattened and unflattened IR 141 typically results in the same generated code. 142 143 Another example, this one showing loads and stores: 144 145 addl %edx,4(%eax) 146 147 This becomes (again ignoring condition code and instruction pointer 148 updates): 149 150 ------ IMark(0x4000ABA, 3, 0) ------ 151 t3 = Add32(GET:I32(0),0x4:I32) 152 t2 = LDle:I32(t3) 153 t1 = GET:I32(8) 154 t0 = Add32(t2,t1) 155 STle(t3) = t0 156 157 The "le" in "LDle" and "STle" is short for "little-endian". 158 159 No need for deallocations 160 ~~~~~~~~~~~~~~~~~~~~~~~~~ 161 Although there are allocation functions for various data structures 162 in this file, there are no deallocation functions. This is because 163 Vex uses a memory allocation scheme that automatically reclaims the 164 memory used by allocated structures once translation is completed. 165 This makes things easier for tools that instruments/transforms code 166 blocks. 167 168 SSAness and typing 169 ~~~~~~~~~~~~~~~~~~ 170 The IR is fully typed. For every IRSB (IR block) it is possible to 171 say unambiguously whether or not it is correctly typed. 172 Incorrectly typed IR has no meaning and the VEX will refuse to 173 process it. At various points during processing VEX typechecks the 174 IR and aborts if any violations are found. This seems overkill but 175 makes it a great deal easier to build a reliable JIT. 176 177 IR also has the SSA property. SSA stands for Static Single 178 Assignment, and what it means is that each IR temporary may be 179 assigned to only once. This idea became widely used in compiler 180 construction in the mid to late 90s. It makes many IR-level 181 transformations/code improvements easier, simpler and faster. 182 Whenever it typechecks an IR block, VEX also checks the SSA 183 property holds, and will abort if not so. So SSAness is 184 mechanically and rigidly enforced. 185*/ 186 187/*---------------------------------------------------------------*/ 188/*--- Type definitions for the IR ---*/ 189/*---------------------------------------------------------------*/ 190 191/* General comments about naming schemes: 192 193 All publically visible functions contain the name of the primary 194 type on which they operate (IRFoo, IRBar, etc). Hence you should 195 be able to identify these functions by grepping for "IR[A-Z]". 196 197 For some type 'IRFoo': 198 199 - ppIRFoo is the printing method for IRFoo, printing it to the 200 output channel specified in the LibVEX_Initialise call. 201 202 - eqIRFoo is a structural equality predicate for IRFoos. 203 204 - deepCopyIRFoo is a deep copy constructor for IRFoos. 205 It recursively traverses the entire argument tree and 206 produces a complete new tree. All types have a deep copy 207 constructor. 208 209 - shallowCopyIRFoo is the shallow copy constructor for IRFoos. 210 It creates a new top-level copy of the supplied object, 211 but does not copy any sub-objects. Only some types have a 212 shallow copy constructor. 213*/ 214 215/* ------------------ Types ------------------ */ 216 217/* A type indicates the size of a value, and whether it's an integer, a 218 float, or a vector (SIMD) value. */ 219typedef 220 enum { 221 Ity_INVALID=0x1100, 222 Ity_I1, 223 Ity_I8, 224 Ity_I16, 225 Ity_I32, 226 Ity_I64, 227 Ity_I128, /* 128-bit scalar */ 228 Ity_F32, /* IEEE 754 float */ 229 Ity_F64, /* IEEE 754 double */ 230 Ity_D32, /* 32-bit Decimal floating point */ 231 Ity_D64, /* 64-bit Decimal floating point */ 232 Ity_D128, /* 128-bit Decimal floating point */ 233 Ity_F128, /* 128-bit floating point; implementation defined */ 234 Ity_V128, /* 128-bit SIMD */ 235 Ity_V256 /* 256-bit SIMD */ 236 } 237 IRType; 238 239/* Pretty-print an IRType */ 240extern void ppIRType ( IRType ); 241 242/* Get the size (in bytes) of an IRType */ 243extern Int sizeofIRType ( IRType ); 244 245 246/* ------------------ Endianness ------------------ */ 247 248/* IREndness is used in load IRExprs and store IRStmts. */ 249typedef 250 enum { 251 Iend_LE=0x1200, /* little endian */ 252 Iend_BE /* big endian */ 253 } 254 IREndness; 255 256 257/* ------------------ Constants ------------------ */ 258 259/* IRConsts are used within 'Const' and 'Exit' IRExprs. */ 260 261/* The various kinds of constant. */ 262typedef 263 enum { 264 Ico_U1=0x1300, 265 Ico_U8, 266 Ico_U16, 267 Ico_U32, 268 Ico_U64, 269 Ico_F32, /* 32-bit IEEE754 floating */ 270 Ico_F32i, /* 32-bit unsigned int to be interpreted literally 271 as a IEEE754 single value. */ 272 Ico_F64, /* 64-bit IEEE754 floating */ 273 Ico_F64i, /* 64-bit unsigned int to be interpreted literally 274 as a IEEE754 double value. */ 275 Ico_V128, /* 128-bit restricted vector constant, with 1 bit 276 (repeated 8 times) for each of the 16 x 1-byte lanes */ 277 Ico_V256 /* 256-bit restricted vector constant, with 1 bit 278 (repeated 8 times) for each of the 32 x 1-byte lanes */ 279 } 280 IRConstTag; 281 282/* A constant. Stored as a tagged union. 'tag' indicates what kind of 283 constant this is. 'Ico' is the union that holds the fields. If an 284 IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant, 285 and its value can be accessed with 'c.Ico.U32'. */ 286typedef 287 struct _IRConst { 288 IRConstTag tag; 289 union { 290 Bool U1; 291 UChar U8; 292 UShort U16; 293 UInt U32; 294 ULong U64; 295 Float F32; 296 UInt F32i; 297 Double F64; 298 ULong F64i; 299 UShort V128; /* 16-bit value; see Ico_V128 comment above */ 300 UInt V256; /* 32-bit value; see Ico_V256 comment above */ 301 } Ico; 302 } 303 IRConst; 304 305/* IRConst constructors */ 306extern IRConst* IRConst_U1 ( Bool ); 307extern IRConst* IRConst_U8 ( UChar ); 308extern IRConst* IRConst_U16 ( UShort ); 309extern IRConst* IRConst_U32 ( UInt ); 310extern IRConst* IRConst_U64 ( ULong ); 311extern IRConst* IRConst_F32 ( Float ); 312extern IRConst* IRConst_F32i ( UInt ); 313extern IRConst* IRConst_F64 ( Double ); 314extern IRConst* IRConst_F64i ( ULong ); 315extern IRConst* IRConst_V128 ( UShort ); 316extern IRConst* IRConst_V256 ( UInt ); 317 318/* Deep-copy an IRConst */ 319extern IRConst* deepCopyIRConst ( IRConst* ); 320 321/* Pretty-print an IRConst */ 322extern void ppIRConst ( IRConst* ); 323 324/* Compare two IRConsts for equality */ 325extern Bool eqIRConst ( IRConst*, IRConst* ); 326 327 328/* ------------------ Call targets ------------------ */ 329 330/* Describes a helper function to call. The name part is purely for 331 pretty printing and not actually used. regparms=n tells the back 332 end that the callee has been declared 333 "__attribute__((regparm(n)))", although indirectly using the 334 VEX_REGPARM(n) macro. On some targets (x86) the back end will need 335 to construct a non-standard sequence to call a function declared 336 like this. 337 338 mcx_mask is a sop to Memcheck. It indicates which args should be 339 considered 'always defined' when lazily computing definedness of 340 the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to 341 args[1], etc. If a bit is set, the corresponding arg is excluded 342 (hence "x" in "mcx") from definedness checking. 343*/ 344 345typedef 346 struct { 347 Int regparms; 348 const HChar* name; 349 void* addr; 350 UInt mcx_mask; 351 } 352 IRCallee; 353 354/* Create an IRCallee. */ 355extern IRCallee* mkIRCallee ( Int regparms, const HChar* name, void* addr ); 356 357/* Deep-copy an IRCallee. */ 358extern IRCallee* deepCopyIRCallee ( IRCallee* ); 359 360/* Pretty-print an IRCallee. */ 361extern void ppIRCallee ( IRCallee* ); 362 363 364/* ------------------ Guest state arrays ------------------ */ 365 366/* This describes a section of the guest state that we want to 367 be able to index at run time, so as to be able to describe 368 indexed or rotating register files on the guest. */ 369typedef 370 struct { 371 Int base; /* guest state offset of start of indexed area */ 372 IRType elemTy; /* type of each element in the indexed area */ 373 Int nElems; /* number of elements in the indexed area */ 374 } 375 IRRegArray; 376 377extern IRRegArray* mkIRRegArray ( Int, IRType, Int ); 378 379extern IRRegArray* deepCopyIRRegArray ( IRRegArray* ); 380 381extern void ppIRRegArray ( IRRegArray* ); 382extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* ); 383 384 385/* ------------------ Temporaries ------------------ */ 386 387/* This represents a temporary, eg. t1. The IR optimiser relies on the 388 fact that IRTemps are 32-bit ints. Do not change them to be ints of 389 any other size. */ 390typedef UInt IRTemp; 391 392/* Pretty-print an IRTemp. */ 393extern void ppIRTemp ( IRTemp ); 394 395#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) 396 397 398/* --------------- Primops (arity 1,2,3 and 4) --------------- */ 399 400/* Primitive operations that are used in Unop, Binop, Triop and Qop 401 IRExprs. Once we take into account integer, floating point and SIMD 402 operations of all the different sizes, there are quite a lot of them. 403 Most instructions supported by the architectures that Vex supports 404 (x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid) 405 are not; they are instead handled with dirty helpers that emulate 406 their functionality. Such obscure ones are thus not directly visible 407 in the IR, but their effects on guest state (memory and registers) 408 are made visible via the annotations in IRDirty structures. 409*/ 410typedef 411 enum { 412 /* -- Do not change this ordering. The IR generators rely on 413 (eg) Iop_Add64 == IopAdd8 + 3. -- */ 414 415 Iop_INVALID=0x1400, 416 Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64, 417 Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64, 418 /* Signless mul. MullS/MullU is elsewhere. */ 419 Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64, 420 Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64, 421 Iop_And8, Iop_And16, Iop_And32, Iop_And64, 422 Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64, 423 Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64, 424 Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64, 425 Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64, 426 /* Integer comparisons. */ 427 Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64, 428 Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64, 429 /* Tags for unary ops */ 430 Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64, 431 432 /* Exactly like CmpEQ8/16/32/64, but carrying the additional 433 hint that these compute the success/failure of a CAS 434 operation, and hence are almost certainly applied to two 435 copies of the same value, which in turn has implications for 436 Memcheck's instrumentation. */ 437 Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64, 438 Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64, 439 440 /* Exactly like CmpNE8/16/32/64, but carrying the additional 441 hint that these needs expensive definedness tracking. */ 442 Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64, 443 444 /* -- Ordering not important after here. -- */ 445 446 /* Widening multiplies */ 447 Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64, 448 Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64, 449 450 /* Wierdo integer stuff */ 451 Iop_Clz64, Iop_Clz32, /* count leading zeroes */ 452 Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */ 453 /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of 454 zero. You must ensure they are never given a zero argument. 455 */ 456 457 /* Standard integer comparisons */ 458 Iop_CmpLT32S, Iop_CmpLT64S, 459 Iop_CmpLE32S, Iop_CmpLE64S, 460 Iop_CmpLT32U, Iop_CmpLT64U, 461 Iop_CmpLE32U, Iop_CmpLE64U, 462 463 /* As a sop to Valgrind-Memcheck, the following are useful. */ 464 Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64, 465 Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */ 466 Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */ 467 Iop_Max32U, /* unsigned max */ 468 469 /* PowerPC-style 3-way integer comparisons. Without them it is 470 difficult to simulate PPC efficiently. 471 op(x,y) | x < y = 0x8 else 472 | x > y = 0x4 else 473 | x == y = 0x2 474 */ 475 Iop_CmpORD32U, Iop_CmpORD64U, 476 Iop_CmpORD32S, Iop_CmpORD64S, 477 478 /* Division */ 479 /* TODO: clarify semantics wrt rounding, negative values, whatever */ 480 Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod) 481 Iop_DivS32, // ditto, signed 482 Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod) 483 Iop_DivS64, // ditto, signed 484 Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low)) 485 Iop_DivS64E, // ditto, signed 486 Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low)) 487 Iop_DivS32E, // ditto, signed 488 489 Iop_DivModU64to32, // :: I64,I32 -> I64 490 // of which lo half is div and hi half is mod 491 Iop_DivModS64to32, // ditto, signed 492 493 Iop_DivModU128to64, // :: V128,I64 -> V128 494 // of which lo half is div and hi half is mod 495 Iop_DivModS128to64, // ditto, signed 496 497 Iop_DivModS64to64, // :: I64,I64 -> I128 498 // of which lo half is div and hi half is mod 499 500 /* Integer conversions. Some of these are redundant (eg 501 Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but 502 having a complete set reduces the typical dynamic size of IR 503 and makes the instruction selectors easier to write. */ 504 505 /* Widening conversions */ 506 Iop_8Uto16, Iop_8Uto32, Iop_8Uto64, 507 Iop_16Uto32, Iop_16Uto64, 508 Iop_32Uto64, 509 Iop_8Sto16, Iop_8Sto32, Iop_8Sto64, 510 Iop_16Sto32, Iop_16Sto64, 511 Iop_32Sto64, 512 513 /* Narrowing conversions */ 514 Iop_64to8, Iop_32to8, Iop_64to16, 515 /* 8 <-> 16 bit conversions */ 516 Iop_16to8, // :: I16 -> I8, low half 517 Iop_16HIto8, // :: I16 -> I8, high half 518 Iop_8HLto16, // :: (I8,I8) -> I16 519 /* 16 <-> 32 bit conversions */ 520 Iop_32to16, // :: I32 -> I16, low half 521 Iop_32HIto16, // :: I32 -> I16, high half 522 Iop_16HLto32, // :: (I16,I16) -> I32 523 /* 32 <-> 64 bit conversions */ 524 Iop_64to32, // :: I64 -> I32, low half 525 Iop_64HIto32, // :: I64 -> I32, high half 526 Iop_32HLto64, // :: (I32,I32) -> I64 527 /* 64 <-> 128 bit conversions */ 528 Iop_128to64, // :: I128 -> I64, low half 529 Iop_128HIto64, // :: I128 -> I64, high half 530 Iop_64HLto128, // :: (I64,I64) -> I128 531 /* 1-bit stuff */ 532 Iop_Not1, /* :: Ity_Bit -> Ity_Bit */ 533 Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */ 534 Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */ 535 Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */ 536 Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */ 537 Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */ 538 Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */ 539 Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */ 540 Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */ 541 Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */ 542 543 /* ------ Floating point. We try to be IEEE754 compliant. ------ */ 544 545 /* --- Simple stuff as mandated by 754. --- */ 546 547 /* Binary operations, with rounding. */ 548 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 549 Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, 550 551 /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ 552 Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, 553 554 /* Variants of the above which produce a 64-bit result but which 555 round their result to a IEEE float range first. */ 556 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 557 Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32, 558 559 /* Unary operations, without rounding. */ 560 /* :: F64 -> F64 */ 561 Iop_NegF64, Iop_AbsF64, 562 563 /* :: F32 -> F32 */ 564 Iop_NegF32, Iop_AbsF32, 565 566 /* Unary operations, with rounding. */ 567 /* :: IRRoundingMode(I32) x F64 -> F64 */ 568 Iop_SqrtF64, 569 570 /* :: IRRoundingMode(I32) x F32 -> F32 */ 571 Iop_SqrtF32, 572 573 /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following: 574 0x45 Unordered 575 0x01 LT 576 0x00 GT 577 0x40 EQ 578 This just happens to be the Intel encoding. The values 579 are recorded in the type IRCmpF64Result. 580 */ 581 /* :: F64 x F64 -> IRCmpF64Result(I32) */ 582 Iop_CmpF64, 583 Iop_CmpF32, 584 Iop_CmpF128, 585 586 /* --- Int to/from FP conversions. --- */ 587 588 /* For the most part, these take a first argument :: Ity_I32 (as 589 IRRoundingMode) which is an indication of the rounding mode 590 to use, as per the following encoding ("the standard 591 encoding"): 592 00b to nearest (the default) 593 01b to -infinity 594 10b to +infinity 595 11b to zero 596 This just happens to be the Intel encoding. For reference only, 597 the PPC encoding is: 598 00b to nearest (the default) 599 01b to zero 600 10b to +infinity 601 11b to -infinity 602 Any PPC -> IR front end will have to translate these PPC 603 encodings, as encoded in the guest state, to the standard 604 encodings, to pass to the primops. 605 For reference only, the ARM VFP encoding is: 606 00b to nearest 607 01b to +infinity 608 10b to -infinity 609 11b to zero 610 Again, this will have to be converted to the standard encoding 611 to pass to primops. 612 613 If one of these conversions gets an out-of-range condition, 614 or a NaN, as an argument, the result is host-defined. On x86 615 the "integer indefinite" value 0x80..00 is produced. On PPC 616 it is either 0x80..00 or 0x7F..FF depending on the sign of 617 the argument. 618 619 On ARMvfp, when converting to a signed integer result, the 620 overflow result is 0x80..00 for negative args and 0x7F..FF 621 for positive args. For unsigned integer results it is 622 0x00..00 and 0xFF..FF respectively. 623 624 Rounding is required whenever the destination type cannot 625 represent exactly all values of the source type. 626 */ 627 Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */ 628 Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */ 629 Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */ 630 Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */ 631 632 Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */ 633 634 Iop_I32StoF64, /* signed I32 -> F64 */ 635 Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */ 636 Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */ 637 Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */ 638 639 Iop_I32UtoF32, /* IRRoundingMode(I32) x unsigned I32 -> F32 */ 640 Iop_I32UtoF64, /* unsigned I32 -> F64 */ 641 642 Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */ 643 Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */ 644 Iop_F32toI32U, /* IRRoundingMode(I32) x F32 -> unsigned I32 */ 645 Iop_F32toI64U, /* IRRoundingMode(I32) x F32 -> unsigned I64 */ 646 647 Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */ 648 Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */ 649 650 /* Conversion between floating point formats */ 651 Iop_F32toF64, /* F32 -> F64 */ 652 Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */ 653 654 /* Reinterpretation. Take an F64 and produce an I64 with 655 the same bit pattern, or vice versa. */ 656 Iop_ReinterpF64asI64, Iop_ReinterpI64asF64, 657 Iop_ReinterpF32asI32, Iop_ReinterpI32asF32, 658 659 /* Support for 128-bit floating point */ 660 Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */ 661 Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */ 662 Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */ 663 664 /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */ 665 Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128, 666 667 /* :: F128 -> F128 */ 668 Iop_NegF128, Iop_AbsF128, 669 670 /* :: IRRoundingMode(I32) x F128 -> F128 */ 671 Iop_SqrtF128, 672 673 Iop_I32StoF128, /* signed I32 -> F128 */ 674 Iop_I64StoF128, /* signed I64 -> F128 */ 675 Iop_I32UtoF128, /* unsigned I32 -> F128 */ 676 Iop_I64UtoF128, /* unsigned I64 -> F128 */ 677 Iop_F32toF128, /* F32 -> F128 */ 678 Iop_F64toF128, /* F64 -> F128 */ 679 680 Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */ 681 Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */ 682 Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32 */ 683 Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64 */ 684 Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */ 685 Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */ 686 687 /* --- guest x86/amd64 specifics, not mandated by 754. --- */ 688 689 /* Binary ops, with rounding. */ 690 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 691 Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */ 692 Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */ 693 Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */ 694 Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */ 695 Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */ 696 Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */ 697 Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */ 698 Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */ 699 /* Note that on x86 guest, PRem1{C3210} has the same behaviour 700 as the IEEE mandated RemF64, except it is limited in the 701 range of its operand. Hence the partialness. */ 702 703 /* Unary ops, with rounding. */ 704 /* :: IRRoundingMode(I32) x F64 -> F64 */ 705 Iop_SinF64, /* FSIN */ 706 Iop_CosF64, /* FCOS */ 707 Iop_TanF64, /* FTAN */ 708 Iop_2xm1F64, /* (2^arg - 1.0) */ 709 Iop_RoundF64toInt, /* F64 value to nearest integral value (still 710 as F64) */ 711 Iop_RoundF32toInt, /* F32 value to nearest integral value (still 712 as F32) */ 713 714 /* --- guest s390 specifics, not mandated by 754. --- */ 715 716 /* Fused multiply-add/sub */ 717 /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32 718 (computes arg2 * arg3 +/- arg4) */ 719 Iop_MAddF32, Iop_MSubF32, 720 721 /* --- guest ppc32/64 specifics, not mandated by 754. --- */ 722 723 /* Ternary operations, with rounding. */ 724 /* Fused multiply-add/sub, with 112-bit intermediate 725 precision for ppc. 726 Also used to implement fused multiply-add/sub for s390. */ 727 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 728 (computes arg2 * arg3 +/- arg4) */ 729 Iop_MAddF64, Iop_MSubF64, 730 731 /* Variants of the above which produce a 64-bit result but which 732 round their result to a IEEE float range first. */ 733 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */ 734 Iop_MAddF64r32, Iop_MSubF64r32, 735 736 /* :: F64 -> F64 */ 737 Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */ 738 Iop_RoundF64toF64_NEAREST, /* frin */ 739 Iop_RoundF64toF64_NegINF, /* frim */ 740 Iop_RoundF64toF64_PosINF, /* frip */ 741 Iop_RoundF64toF64_ZERO, /* friz */ 742 743 /* :: F64 -> F32 */ 744 Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */ 745 746 /* :: IRRoundingMode(I32) x F64 -> F64 */ 747 Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */ 748 /* NB: pretty much the same as Iop_F64toF32, except no change 749 of type. */ 750 751 /* ------------------ 32-bit SIMD Integer ------------------ */ 752 753 /* 32x1 saturating add/sub (ok, well, not really SIMD :) */ 754 Iop_QAdd32S, 755 Iop_QSub32S, 756 757 /* 16x2 add/sub, also signed/unsigned saturating variants */ 758 Iop_Add16x2, Iop_Sub16x2, 759 Iop_QAdd16Sx2, Iop_QAdd16Ux2, 760 Iop_QSub16Sx2, Iop_QSub16Ux2, 761 762 /* 16x2 signed/unsigned halving add/sub. For each lane, these 763 compute bits 16:1 of (eg) sx(argL) + sx(argR), 764 or zx(argL) - zx(argR) etc. */ 765 Iop_HAdd16Ux2, Iop_HAdd16Sx2, 766 Iop_HSub16Ux2, Iop_HSub16Sx2, 767 768 /* 8x4 add/sub, also signed/unsigned saturating variants */ 769 Iop_Add8x4, Iop_Sub8x4, 770 Iop_QAdd8Sx4, Iop_QAdd8Ux4, 771 Iop_QSub8Sx4, Iop_QSub8Ux4, 772 773 /* 8x4 signed/unsigned halving add/sub. For each lane, these 774 compute bits 8:1 of (eg) sx(argL) + sx(argR), 775 or zx(argL) - zx(argR) etc. */ 776 Iop_HAdd8Ux4, Iop_HAdd8Sx4, 777 Iop_HSub8Ux4, Iop_HSub8Sx4, 778 779 /* 8x4 sum of absolute unsigned differences. */ 780 Iop_Sad8Ux4, 781 782 /* MISC (vector integer cmp != 0) */ 783 Iop_CmpNEZ16x2, Iop_CmpNEZ8x4, 784 785 /* ------------------ 64-bit SIMD FP ------------------------ */ 786 787 /* Convertion to/from int */ 788 Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */ 789 Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */ 790 /* Fixed32 format is floating-point number with fixed number of fraction 791 bits. The number of fraction bits is passed as a second argument of 792 type I8. */ 793 Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */ 794 Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */ 795 796 /* Binary operations */ 797 Iop_Max32Fx2, Iop_Min32Fx2, 798 /* Pairwise Min and Max. See integer pairwise operations for more 799 details. */ 800 Iop_PwMax32Fx2, Iop_PwMin32Fx2, 801 /* Note: For the following compares, the arm front-end assumes a 802 nan in a lane of either argument returns zero for that lane. */ 803 Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2, 804 805 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 806 element in the operand vector, and places the results in the destination 807 vector. */ 808 Iop_Recip32Fx2, 809 810 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 811 Note, that if one of the arguments is zero and another one is infinity 812 of arbitrary sign the result of the operation is 2.0. */ 813 Iop_Recps32Fx2, 814 815 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 816 square root of each element in the operand vector. */ 817 Iop_Rsqrte32Fx2, 818 819 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 820 Note, that of one of the arguments is zero and another one is infiinty 821 of arbitrary sign the result of the operation is 1.5. */ 822 Iop_Rsqrts32Fx2, 823 824 /* Unary */ 825 Iop_Neg32Fx2, Iop_Abs32Fx2, 826 827 /* ------------------ 64-bit SIMD Integer. ------------------ */ 828 829 /* MISC (vector integer cmp != 0) */ 830 Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2, 831 832 /* ADDITION (normal / unsigned sat / signed sat) */ 833 Iop_Add8x8, Iop_Add16x4, Iop_Add32x2, 834 Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1, 835 Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1, 836 837 /* PAIRWISE operations */ 838 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 839 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 840 Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2, 841 Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2, 842 Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2, 843 Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2, 844 Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2, 845 /* Longening variant is unary. The resulting vector contains two times 846 less elements than operand, but they are two times wider. 847 Example: 848 Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 849 where a+b and c+d are unsigned 32-bit values. */ 850 Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2, 851 Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2, 852 853 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 854 Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2, 855 Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1, 856 Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1, 857 858 /* ABSOLUTE VALUE */ 859 Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2, 860 861 /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */ 862 Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2, 863 Iop_Mul32Fx2, 864 Iop_MulHi16Ux4, 865 Iop_MulHi16Sx4, 866 /* Plynomial multiplication treats it's arguments as coefficients of 867 polynoms over {0, 1}. */ 868 Iop_PolynomialMul8x8, 869 870 /* Vector Saturating Doubling Multiply Returning High Half and 871 Vector Saturating Rounding Doubling Multiply Returning High Half */ 872 /* These IROp's multiply corresponding elements in two vectors, double 873 the results, and place the most significant half of the final results 874 in the destination vector. The results are truncated or rounded. If 875 any of the results overflow, they are saturated. */ 876 Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2, 877 Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2, 878 879 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 880 Iop_Avg8Ux8, 881 Iop_Avg16Ux4, 882 883 /* MIN/MAX */ 884 Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2, 885 Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2, 886 Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2, 887 Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2, 888 889 /* COMPARISON */ 890 Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2, 891 Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2, 892 Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2, 893 894 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 895 bit) */ 896 Iop_Cnt8x8, 897 Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2, 898 Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2, 899 900 /* VECTOR x VECTOR SHIFT / ROTATE */ 901 Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2, 902 Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2, 903 Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2, 904 Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1, 905 906 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 907 Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2, 908 Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2, 909 Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2, 910 911 /* VECTOR x VECTOR SATURATING SHIFT */ 912 Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1, 913 Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1, 914 /* VECTOR x INTEGER SATURATING SHIFT */ 915 Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1, 916 Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1, 917 Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1, 918 919 /* NARROWING (binary) 920 -- narrow 2xI64 into 1xI64, hi half from left arg */ 921 /* For saturated narrowing, I believe there are 4 variants of 922 the basic arithmetic operation, depending on the signedness 923 of argument and result. Here are examples that exemplify 924 what I mean: 925 926 QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255; 927 return x[7:0]; 928 929 QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128; 930 if (x >s 127) x = 127; 931 return x[7:0]; 932 933 QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127; 934 return x[7:0]; 935 936 QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0; 937 if (x >s 255) x = 255; 938 return x[7:0]; 939 */ 940 Iop_QNarrowBin16Sto8Ux8, 941 Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4, 942 Iop_NarrowBin16to8x8, Iop_NarrowBin32to16x4, 943 944 /* INTERLEAVING */ 945 /* Interleave lanes from low or high halves of 946 operands. Most-significant result lane is from the left 947 arg. */ 948 Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2, 949 Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2, 950 /* Interleave odd/even lanes of operands. Most-significant result lane 951 is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are 952 identical to Interleave{HI,LO}32x2 and so are omitted.*/ 953 Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8, 954 Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4, 955 956 /* CONCATENATION -- build a new value by concatenating either 957 the even or odd lanes of both operands. Note that 958 Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2 959 and so are omitted. */ 960 Iop_CatOddLanes8x8, Iop_CatOddLanes16x4, 961 Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4, 962 963 /* GET / SET elements of VECTOR 964 GET is binop (I64, I8) -> I<elem_size> 965 SET is triop (I64, I8, I<elem_size>) -> I64 */ 966 /* Note: the arm back-end handles only constant second argument */ 967 Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2, 968 Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2, 969 970 /* DUPLICATING -- copy value to all lanes */ 971 Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2, 972 973 /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes 974 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of 975 result. 976 It is a triop: (I64, I64, I8) -> I64 */ 977 /* Note: the arm back-end handles only constant third argumnet. */ 978 Iop_Extract64, 979 980 /* REVERSE the order of elements in each Half-words, Words, 981 Double-words */ 982 /* Examples: 983 Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 984 Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] 985 Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */ 986 Iop_Reverse16_8x8, 987 Iop_Reverse32_8x8, Iop_Reverse32_16x4, 988 Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2, 989 990 /* PERMUTING -- copy src bytes to dst, 991 as indexed by control vector bytes: 992 for i in 0 .. 7 . result[i] = argL[ argR[i] ] 993 argR[i] values may only be in the range 0 .. 7, else behaviour 994 is undefined. */ 995 Iop_Perm8x8, 996 997 /* MISC CONVERSION -- get high bits of each byte lane, a la 998 x86/amd64 pmovmskb */ 999 Iop_GetMSBs8x8, /* I64 -> I8 */ 1000 1001 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 1002 See floating-point equiwalents for details. */ 1003 Iop_Recip32x2, Iop_Rsqrte32x2, 1004 1005 /* ------------------ Decimal Floating Point ------------------ */ 1006 1007 /* ARITHMETIC INSTRUCTIONS 64-bit 1008 ---------------------------------- 1009 IRRoundingMode(I32) X D64 X D64 -> D64 1010 */ 1011 Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64, 1012 1013 /* ARITHMETIC INSTRUCTIONS 128-bit 1014 ---------------------------------- 1015 IRRoundingMode(I32) X D128 X D128 -> D128 1016 */ 1017 Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128, 1018 1019 /* SHIFT SIGNIFICAND INSTRUCTIONS 1020 * The DFP significand is shifted by the number of digits specified 1021 * by the U8 operand. Digits shifted out of the leftmost digit are 1022 * lost. Zeros are supplied to the vacated positions on the right. 1023 * The sign of the result is the same as the sign of the original 1024 * operand. 1025 * 1026 * D64 x U8 -> D64 left shift and right shift respectively */ 1027 Iop_ShlD64, Iop_ShrD64, 1028 1029 /* D128 x U8 -> D128 left shift and right shift respectively */ 1030 Iop_ShlD128, Iop_ShrD128, 1031 1032 1033 /* FORMAT CONVERSION INSTRUCTIONS 1034 * D32 -> D64 1035 */ 1036 Iop_D32toD64, 1037 1038 /* D64 -> D128 */ 1039 Iop_D64toD128, 1040 1041 /* I32S -> D128 */ 1042 Iop_I32StoD128, 1043 1044 /* I32U -> D128 */ 1045 Iop_I32UtoD128, 1046 1047 /* I64S -> D128 */ 1048 Iop_I64StoD128, 1049 1050 /* I64U -> D128 */ 1051 Iop_I64UtoD128, 1052 1053 /* IRRoundingMode(I32) x D64 -> D32 */ 1054 Iop_D64toD32, 1055 1056 /* IRRoundingMode(I32) x D128 -> D64 */ 1057 Iop_D128toD64, 1058 1059 /* I32S -> D64 */ 1060 Iop_I32StoD64, 1061 1062 /* I32U -> D64 */ 1063 Iop_I32UtoD64, 1064 1065 /* IRRoundingMode(I32) x I64 -> D64 */ 1066 Iop_I64StoD64, 1067 1068 /* IRRoundingMode(I32) x I64 -> D64 */ 1069 Iop_I64UtoD64, 1070 1071 /* IRRoundingMode(I32) x D64 -> I32 */ 1072 Iop_D64toI32S, 1073 1074 /* IRRoundingMode(I32) x D64 -> I32 */ 1075 Iop_D64toI32U, 1076 1077 /* IRRoundingMode(I32) x D64 -> I64 */ 1078 Iop_D64toI64S, 1079 1080 /* IRRoundingMode(I32) x D64 -> I64 */ 1081 Iop_D64toI64U, 1082 1083 /* IRRoundingMode(I32) x D128 -> I32 */ 1084 Iop_D128toI32S, 1085 1086 /* IRRoundingMode(I32) x D128 -> I32 */ 1087 Iop_D128toI32U, 1088 1089 /* IRRoundingMode(I32) x D128 -> I64 */ 1090 Iop_D128toI64S, 1091 1092 /* IRRoundingMode(I32) x D128 -> I64 */ 1093 Iop_D128toI64U, 1094 1095 /* IRRoundingMode(I32) x F32 -> D32 */ 1096 Iop_F32toD32, 1097 1098 /* IRRoundingMode(I32) x F32 -> D64 */ 1099 Iop_F32toD64, 1100 1101 /* IRRoundingMode(I32) x F32 -> D128 */ 1102 Iop_F32toD128, 1103 1104 /* IRRoundingMode(I32) x F64 -> D32 */ 1105 Iop_F64toD32, 1106 1107 /* IRRoundingMode(I32) x F64 -> D64 */ 1108 Iop_F64toD64, 1109 1110 /* IRRoundingMode(I32) x F64 -> D128 */ 1111 Iop_F64toD128, 1112 1113 /* IRRoundingMode(I32) x F128 -> D32 */ 1114 Iop_F128toD32, 1115 1116 /* IRRoundingMode(I32) x F128 -> D64 */ 1117 Iop_F128toD64, 1118 1119 /* IRRoundingMode(I32) x F128 -> D128 */ 1120 Iop_F128toD128, 1121 1122 /* IRRoundingMode(I32) x D32 -> F32 */ 1123 Iop_D32toF32, 1124 1125 /* IRRoundingMode(I32) x D32 -> F64 */ 1126 Iop_D32toF64, 1127 1128 /* IRRoundingMode(I32) x D32 -> F128 */ 1129 Iop_D32toF128, 1130 1131 /* IRRoundingMode(I32) x D64 -> F32 */ 1132 Iop_D64toF32, 1133 1134 /* IRRoundingMode(I32) x D64 -> F64 */ 1135 Iop_D64toF64, 1136 1137 /* IRRoundingMode(I32) x D64 -> F128 */ 1138 Iop_D64toF128, 1139 1140 /* IRRoundingMode(I32) x D128 -> F32 */ 1141 Iop_D128toF32, 1142 1143 /* IRRoundingMode(I32) x D128 -> F64 */ 1144 Iop_D128toF64, 1145 1146 /* IRRoundingMode(I32) x D128 -> F128 */ 1147 Iop_D128toF128, 1148 1149 /* ROUNDING INSTRUCTIONS 1150 * IRRoundingMode(I32) x D64 -> D64 1151 * The D64 operand, if a finite number, it is rounded to a 1152 * floating point integer value, i.e. no fractional part. 1153 */ 1154 Iop_RoundD64toInt, 1155 1156 /* IRRoundingMode(I32) x D128 -> D128 */ 1157 Iop_RoundD128toInt, 1158 1159 /* COMPARE INSTRUCTIONS 1160 * D64 x D64 -> IRCmpD64Result(I32) */ 1161 Iop_CmpD64, 1162 1163 /* D128 x D128 -> IRCmpD128Result(I32) */ 1164 Iop_CmpD128, 1165 1166 /* COMPARE BIASED EXPONENET INSTRUCTIONS 1167 * D64 x D64 -> IRCmpD64Result(I32) */ 1168 Iop_CmpExpD64, 1169 1170 /* D128 x D128 -> IRCmpD128Result(I32) */ 1171 Iop_CmpExpD128, 1172 1173 /* QUANTIZE AND ROUND INSTRUCTIONS 1174 * The source operand is converted and rounded to the form with the 1175 * immediate exponent specified by the rounding and exponent parameter. 1176 * 1177 * The second operand is converted and rounded to the form 1178 * of the first operand's exponent and the rounded based on the specified 1179 * rounding mode parameter. 1180 * 1181 * IRRoundingMode(I32) x D64 x D64-> D64 */ 1182 Iop_QuantizeD64, 1183 1184 /* IRRoundingMode(I32) x D128 x D128 -> D128 */ 1185 Iop_QuantizeD128, 1186 1187 /* IRRoundingMode(I32) x I8 x D64 -> D64 1188 * The Decimal Floating point operand is rounded to the requested 1189 * significance given by the I8 operand as specified by the rounding 1190 * mode. 1191 */ 1192 Iop_SignificanceRoundD64, 1193 1194 /* IRRoundingMode(I32) x I8 x D128 -> D128 */ 1195 Iop_SignificanceRoundD128, 1196 1197 /* EXTRACT AND INSERT INSTRUCTIONS 1198 * D64 -> I64 1199 * The exponent of the D32 or D64 operand is extracted. The 1200 * extracted exponent is converted to a 64-bit signed binary integer. 1201 */ 1202 Iop_ExtractExpD64, 1203 1204 /* D128 -> I64 */ 1205 Iop_ExtractExpD128, 1206 1207 /* D64 -> I64 1208 * The number of significand digits of the D64 operand is extracted. 1209 * The number is stored as a 64-bit signed binary integer. 1210 */ 1211 Iop_ExtractSigD64, 1212 1213 /* D128 -> I64 */ 1214 Iop_ExtractSigD128, 1215 1216 /* I64 x D64 -> D64 1217 * The exponent is specified by the first I64 operand the signed 1218 * significand is given by the second I64 value. The result is a D64 1219 * value consisting of the specified significand and exponent whose 1220 * sign is that of the specified significand. 1221 */ 1222 Iop_InsertExpD64, 1223 1224 /* I64 x D128 -> D128 */ 1225 Iop_InsertExpD128, 1226 1227 /* Support for 128-bit DFP type */ 1228 Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64, 1229 1230 /* I64 -> I64 1231 * Convert 50-bit densely packed BCD string to 60 bit BCD string 1232 */ 1233 Iop_DPBtoBCD, 1234 1235 /* I64 -> I64 1236 * Convert 60 bit BCD string to 50-bit densely packed BCD string 1237 */ 1238 Iop_BCDtoDPB, 1239 1240 /* Conversion I64 -> D64 */ 1241 Iop_ReinterpI64asD64, 1242 1243 /* Conversion D64 -> I64 */ 1244 Iop_ReinterpD64asI64, 1245 1246 /* ------------------ 128-bit SIMD FP. ------------------ */ 1247 1248 /* --- 32x4 vector FP --- */ 1249 1250 /* binary */ 1251 Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, 1252 Iop_Max32Fx4, Iop_Min32Fx4, 1253 Iop_Add32Fx2, Iop_Sub32Fx2, 1254 /* Note: For the following compares, the ppc and arm front-ends assume a 1255 nan in a lane of either argument returns zero for that lane. */ 1256 Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4, 1257 Iop_CmpGT32Fx4, Iop_CmpGE32Fx4, 1258 1259 /* Vector Absolute */ 1260 Iop_Abs32Fx4, 1261 1262 /* Pairwise Max and Min. See integer pairwise operations for details. */ 1263 Iop_PwMax32Fx4, Iop_PwMin32Fx4, 1264 1265 /* unary */ 1266 Iop_Sqrt32Fx4, Iop_RSqrt32Fx4, 1267 Iop_Neg32Fx4, 1268 1269 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 1270 element in the operand vector, and places the results in the destination 1271 vector. */ 1272 Iop_Recip32Fx4, 1273 1274 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 1275 Note, that if one of the arguments is zero and another one is infinity 1276 of arbitrary sign the result of the operation is 2.0. */ 1277 Iop_Recps32Fx4, 1278 1279 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 1280 square root of each element in the operand vector. */ 1281 Iop_Rsqrte32Fx4, 1282 1283 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 1284 Note, that of one of the arguments is zero and another one is infiinty 1285 of arbitrary sign the result of the operation is 1.5. */ 1286 Iop_Rsqrts32Fx4, 1287 1288 /* --- Int to/from FP conversion --- */ 1289 /* Unlike the standard fp conversions, these irops take no 1290 rounding mode argument. Instead the irop trailers _R{M,P,N,Z} 1291 indicate the mode: {-inf, +inf, nearest, zero} respectively. */ 1292 Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */ 1293 Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */ 1294 Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */ 1295 Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */ 1296 Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */ 1297 /* Fixed32 format is floating-point number with fixed number of fraction 1298 bits. The number of fraction bits is passed as a second argument of 1299 type I8. */ 1300 Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */ 1301 Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */ 1302 1303 /* --- Single to/from half conversion --- */ 1304 /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */ 1305 Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */ 1306 1307 /* --- 32x4 lowest-lane-only scalar FP --- */ 1308 1309 /* In binary cases, upper 3/4 is copied from first operand. In 1310 unary cases, upper 3/4 is copied from the operand. */ 1311 1312 /* binary */ 1313 Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4, 1314 Iop_Max32F0x4, Iop_Min32F0x4, 1315 Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4, 1316 1317 /* unary */ 1318 Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4, 1319 1320 /* --- 64x2 vector FP --- */ 1321 1322 /* binary */ 1323 Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, 1324 Iop_Max64Fx2, Iop_Min64Fx2, 1325 Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2, 1326 1327 /* unary */ 1328 Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2, 1329 1330 /* --- 64x2 lowest-lane-only scalar FP --- */ 1331 1332 /* In binary cases, upper half is copied from first operand. In 1333 unary cases, upper half is copied from the operand. */ 1334 1335 /* binary */ 1336 Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2, 1337 Iop_Max64F0x2, Iop_Min64F0x2, 1338 Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2, 1339 1340 /* unary */ 1341 Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2, 1342 1343 /* --- pack / unpack --- */ 1344 1345 /* 64 <-> 128 bit vector */ 1346 Iop_V128to64, // :: V128 -> I64, low half 1347 Iop_V128HIto64, // :: V128 -> I64, high half 1348 Iop_64HLtoV128, // :: (I64,I64) -> V128 1349 1350 Iop_64UtoV128, 1351 Iop_SetV128lo64, 1352 1353 /* 32 <-> 128 bit vector */ 1354 Iop_32UtoV128, 1355 Iop_V128to32, // :: V128 -> I32, lowest lane 1356 Iop_SetV128lo32, // :: (V128,I32) -> V128 1357 1358 /* ------------------ 128-bit SIMD Integer. ------------------ */ 1359 1360 /* BITWISE OPS */ 1361 Iop_NotV128, 1362 Iop_AndV128, Iop_OrV128, Iop_XorV128, 1363 1364 /* VECTOR SHIFT (shift amt :: Ity_I8) */ 1365 Iop_ShlV128, Iop_ShrV128, 1366 1367 /* MISC (vector integer cmp != 0) */ 1368 Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, 1369 1370 /* ADDITION (normal / unsigned sat / signed sat) */ 1371 Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, 1372 Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, 1373 Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, 1374 1375 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 1376 Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, 1377 Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, 1378 Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, 1379 1380 /* MULTIPLICATION (normal / high half of signed/unsigned) */ 1381 Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, 1382 Iop_MulHi16Ux8, Iop_MulHi32Ux4, 1383 Iop_MulHi16Sx8, Iop_MulHi32Sx4, 1384 /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ 1385 Iop_MullEven8Ux16, Iop_MullEven16Ux8, 1386 Iop_MullEven8Sx16, Iop_MullEven16Sx8, 1387 /* FIXME: document these */ 1388 Iop_Mull8Ux8, Iop_Mull8Sx8, 1389 Iop_Mull16Ux4, Iop_Mull16Sx4, 1390 Iop_Mull32Ux2, Iop_Mull32Sx2, 1391 /* Vector Saturating Doubling Multiply Returning High Half and 1392 Vector Saturating Rounding Doubling Multiply Returning High Half */ 1393 /* These IROp's multiply corresponding elements in two vectors, double 1394 the results, and place the most significant half of the final results 1395 in the destination vector. The results are truncated or rounded. If 1396 any of the results overflow, they are saturated. */ 1397 Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, 1398 Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, 1399 /* Doubling saturating multiplication (long) (I64, I64) -> V128 */ 1400 Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2, 1401 /* Plynomial multiplication treats it's arguments as coefficients of 1402 polynoms over {0, 1}. */ 1403 Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */ 1404 Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */ 1405 1406 /* PAIRWISE operations */ 1407 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 1408 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 1409 Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4, 1410 Iop_PwAdd32Fx2, 1411 /* Longening variant is unary. The resulting vector contains two times 1412 less elements than operand, but they are two times wider. 1413 Example: 1414 Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 1415 where a+b and c+d are unsigned 32-bit values. */ 1416 Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, 1417 Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4, 1418 1419 /* ABSOLUTE VALUE */ 1420 Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, 1421 1422 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 1423 Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, 1424 Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, 1425 1426 /* MIN/MAX */ 1427 Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, 1428 Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, 1429 Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, 1430 Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, 1431 1432 /* COMPARISON */ 1433 Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, Iop_CmpEQ64x2, 1434 Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, 1435 Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, 1436 1437 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 1438 bit) */ 1439 Iop_Cnt8x16, 1440 Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4, 1441 Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4, 1442 1443 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 1444 Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, 1445 Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2, 1446 Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, 1447 1448 /* VECTOR x VECTOR SHIFT / ROTATE */ 1449 Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2, 1450 Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, 1451 Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, 1452 Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2, 1453 Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, 1454 1455 /* VECTOR x VECTOR SATURATING SHIFT */ 1456 Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2, 1457 Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2, 1458 /* VECTOR x INTEGER SATURATING SHIFT */ 1459 Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2, 1460 Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2, 1461 Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2, 1462 1463 /* NARROWING (binary) 1464 -- narrow 2xV128 into 1xV128, hi half from left arg */ 1465 /* See comments above w.r.t. U vs S issues in saturated narrowing. */ 1466 Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8, 1467 Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, 1468 Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, 1469 Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, 1470 1471 /* NARROWING (unary) -- narrow V128 into I64 */ 1472 Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2, 1473 /* Saturating narrowing from signed source to signed/unsigned destination */ 1474 Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2, 1475 Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2, 1476 /* Saturating narrowing from unsigned source to unsigned destination */ 1477 Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2, 1478 1479 /* WIDENING -- sign or zero extend each element of the argument 1480 vector to the twice original size. The resulting vector consists of 1481 the same number of elements but each element and the vector itself 1482 are twice as wide. 1483 All operations are I64->V128. 1484 Example 1485 Iop_Widen32Sto64x2( [a, b] ) = [c, d] 1486 where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */ 1487 Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2, 1488 Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2, 1489 1490 /* INTERLEAVING */ 1491 /* Interleave lanes from low or high halves of 1492 operands. Most-significant result lane is from the left 1493 arg. */ 1494 Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 1495 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2, 1496 Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 1497 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2, 1498 /* Interleave odd/even lanes of operands. Most-significant result lane 1499 is from the left arg. */ 1500 Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16, 1501 Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8, 1502 Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4, 1503 1504 /* CONCATENATION -- build a new value by concatenating either 1505 the even or odd lanes of both operands. */ 1506 Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4, 1507 Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4, 1508 1509 /* GET elements of VECTOR 1510 GET is binop (V128, I8) -> I<elem_size> */ 1511 /* Note: the arm back-end handles only constant second argument. */ 1512 Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2, 1513 1514 /* DUPLICATING -- copy value to all lanes */ 1515 Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4, 1516 1517 /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes 1518 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of 1519 result. 1520 It is a triop: (V128, V128, I8) -> V128 */ 1521 /* Note: the ARM back end handles only constant arg3 in this operation. */ 1522 Iop_ExtractV128, 1523 1524 /* REVERSE the order of elements in each Half-words, Words, 1525 Double-words */ 1526 /* Examples: 1527 Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 1528 Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */ 1529 Iop_Reverse16_8x16, 1530 Iop_Reverse32_8x16, Iop_Reverse32_16x8, 1531 Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4, 1532 1533 /* PERMUTING -- copy src bytes to dst, 1534 as indexed by control vector bytes: 1535 for i in 0 .. 15 . result[i] = argL[ argR[i] ] 1536 argR[i] values may only be in the range 0 .. 15, else behaviour 1537 is undefined. */ 1538 Iop_Perm8x16, 1539 Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */ 1540 1541 /* MISC CONVERSION -- get high bits of each byte lane, a la 1542 x86/amd64 pmovmskb */ 1543 Iop_GetMSBs8x16, /* V128 -> I16 */ 1544 1545 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 1546 See floating-point equiwalents for details. */ 1547 Iop_Recip32x4, Iop_Rsqrte32x4, 1548 1549 /* ------------------ 256-bit SIMD Integer. ------------------ */ 1550 1551 /* Pack/unpack */ 1552 Iop_V256to64_0, // V256 -> I64, extract least significant lane 1553 Iop_V256to64_1, 1554 Iop_V256to64_2, 1555 Iop_V256to64_3, // V256 -> I64, extract most significant lane 1556 1557 Iop_64x4toV256, // (I64,I64,I64,I64)->V256 1558 // first arg is most significant lane 1559 1560 Iop_V256toV128_0, // V256 -> V128, less significant lane 1561 Iop_V256toV128_1, // V256 -> V128, more significant lane 1562 Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif 1563 1564 Iop_AndV256, 1565 Iop_OrV256, 1566 Iop_XorV256, 1567 Iop_NotV256, 1568 1569 /* MISC (vector integer cmp != 0) */ 1570 Iop_CmpNEZ8x32, Iop_CmpNEZ16x16, Iop_CmpNEZ32x8, Iop_CmpNEZ64x4, 1571 1572 Iop_Add8x32, Iop_Add16x16, Iop_Add32x8, Iop_Add64x4, 1573 Iop_Sub8x32, Iop_Sub16x16, Iop_Sub32x8, Iop_Sub64x4, 1574 1575 Iop_CmpEQ8x32, Iop_CmpEQ16x16, Iop_CmpEQ32x8, Iop_CmpEQ64x4, 1576 Iop_CmpGT8Sx32, Iop_CmpGT16Sx16, Iop_CmpGT32Sx8, Iop_CmpGT64Sx4, 1577 1578 Iop_ShlN16x16, Iop_ShlN32x8, Iop_ShlN64x4, 1579 Iop_ShrN16x16, Iop_ShrN32x8, Iop_ShrN64x4, 1580 Iop_SarN16x16, Iop_SarN32x8, 1581 1582 Iop_Max8Sx32, Iop_Max16Sx16, Iop_Max32Sx8, 1583 Iop_Max8Ux32, Iop_Max16Ux16, Iop_Max32Ux8, 1584 Iop_Min8Sx32, Iop_Min16Sx16, Iop_Min32Sx8, 1585 Iop_Min8Ux32, Iop_Min16Ux16, Iop_Min32Ux8, 1586 1587 Iop_Mul16x16, Iop_Mul32x8, 1588 Iop_MulHi16Ux16, Iop_MulHi16Sx16, 1589 1590 Iop_QAdd8Ux32, Iop_QAdd16Ux16, 1591 Iop_QAdd8Sx32, Iop_QAdd16Sx16, 1592 Iop_QSub8Ux32, Iop_QSub16Ux16, 1593 Iop_QSub8Sx32, Iop_QSub16Sx16, 1594 1595 Iop_Avg8Ux32, Iop_Avg16Ux16, 1596 1597 Iop_Perm32x8, 1598 1599 /* ------------------ 256-bit SIMD FP. ------------------ */ 1600 Iop_Add64Fx4, 1601 Iop_Sub64Fx4, 1602 Iop_Mul64Fx4, 1603 Iop_Div64Fx4, 1604 Iop_Add32Fx8, 1605 Iop_Sub32Fx8, 1606 Iop_Mul32Fx8, 1607 Iop_Div32Fx8, 1608 1609 Iop_Sqrt32Fx8, 1610 Iop_Sqrt64Fx4, 1611 Iop_RSqrt32Fx8, 1612 Iop_Recip32Fx8, 1613 1614 Iop_Max32Fx8, Iop_Min32Fx8, 1615 Iop_Max64Fx4, Iop_Min64Fx4, 1616 Iop_LAST /* must be the last enumerator */ 1617 } 1618 IROp; 1619 1620/* Pretty-print an op. */ 1621extern void ppIROp ( IROp ); 1622 1623 1624/* Encoding of IEEE754-specified rounding modes. 1625 Note, various front and back ends rely on the actual numerical 1626 values of these, so do not change them. */ 1627typedef 1628 enum { 1629 Irrm_NEAREST = 0, // Round to nearest, ties to even 1630 Irrm_NegINF = 1, // Round to negative infinity 1631 Irrm_PosINF = 2, // Round to positive infinity 1632 Irrm_ZERO = 3, // Round toward zero 1633 Irrm_NEAREST_TIE_AWAY_0 = 4, // Round to nearest, ties away from 0 1634 Irrm_PREPARE_SHORTER = 5, // Round to prepare for storter 1635 // precision 1636 Irrm_AWAY_FROM_ZERO = 6, // Round to away from 0 1637 Irrm_NEAREST_TIE_TOWARD_0 = 7 // Round to nearest, ties towards 0 1638 } 1639 IRRoundingMode; 1640 1641/* Binary floating point comparison result values. 1642 This is also derived from what IA32 does. */ 1643typedef 1644 enum { 1645 Ircr_UN = 0x45, 1646 Ircr_LT = 0x01, 1647 Ircr_GT = 0x00, 1648 Ircr_EQ = 0x40 1649 } 1650 IRCmpFResult; 1651 1652typedef IRCmpFResult IRCmpF32Result; 1653typedef IRCmpFResult IRCmpF64Result; 1654typedef IRCmpFResult IRCmpF128Result; 1655 1656/* Decimal floating point result values. */ 1657typedef IRCmpFResult IRCmpDResult; 1658typedef IRCmpDResult IRCmpD64Result; 1659typedef IRCmpDResult IRCmpD128Result; 1660 1661/* ------------------ Expressions ------------------ */ 1662 1663typedef struct _IRQop IRQop; /* forward declaration */ 1664typedef struct _IRTriop IRTriop; /* forward declaration */ 1665 1666 1667/* The different kinds of expressions. Their meaning is explained below 1668 in the comments for IRExpr. */ 1669typedef 1670 enum { 1671 Iex_Binder=0x1900, 1672 Iex_Get, 1673 Iex_GetI, 1674 Iex_RdTmp, 1675 Iex_Qop, 1676 Iex_Triop, 1677 Iex_Binop, 1678 Iex_Unop, 1679 Iex_Load, 1680 Iex_Const, 1681 Iex_ITE, 1682 Iex_CCall 1683 } 1684 IRExprTag; 1685 1686/* An expression. Stored as a tagged union. 'tag' indicates what kind 1687 of expression this is. 'Iex' is the union that holds the fields. If 1688 an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load 1689 expression, and the fields can be accessed with 1690 'e.Iex.Load.<fieldname>'. 1691 1692 For each kind of expression, we show what it looks like when 1693 pretty-printed with ppIRExpr(). 1694*/ 1695typedef 1696 struct _IRExpr 1697 IRExpr; 1698 1699struct _IRExpr { 1700 IRExprTag tag; 1701 union { 1702 /* Used only in pattern matching within Vex. Should not be seen 1703 outside of Vex. */ 1704 struct { 1705 Int binder; 1706 } Binder; 1707 1708 /* Read a guest register, at a fixed offset in the guest state. 1709 ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0) 1710 */ 1711 struct { 1712 Int offset; /* Offset into the guest state */ 1713 IRType ty; /* Type of the value being read */ 1714 } Get; 1715 1716 /* Read a guest register at a non-fixed offset in the guest 1717 state. This allows circular indexing into parts of the guest 1718 state, which is essential for modelling situations where the 1719 identity of guest registers is not known until run time. One 1720 example is the x87 FP register stack. 1721 1722 The part of the guest state to be treated as a circular array 1723 is described in the IRRegArray 'descr' field. It holds the 1724 offset of the first element in the array, the type of each 1725 element, and the number of elements. 1726 1727 The array index is indicated rather indirectly, in a way 1728 which makes optimisation easy: as the sum of variable part 1729 (the 'ix' field) and a constant offset (the 'bias' field). 1730 1731 Since the indexing is circular, the actual array index to use 1732 is computed as (ix + bias) % num-of-elems-in-the-array. 1733 1734 Here's an example. The description 1735 1736 (96:8xF64)[t39,-7] 1737 1738 describes an array of 8 F64-typed values, the 1739 guest-state-offset of the first being 96. This array is 1740 being indexed at (t39 - 7) % 8. 1741 1742 It is important to get the array size/type exactly correct 1743 since IR optimisation looks closely at such info in order to 1744 establish aliasing/non-aliasing between seperate GetI and 1745 PutI events, which is used to establish when they can be 1746 reordered, etc. Putting incorrect info in will lead to 1747 obscure IR optimisation bugs. 1748 1749 ppIRExpr output: GETI<descr>[<ix>,<bias] 1750 eg. GETI(128:8xI8)[t1,0] 1751 */ 1752 struct { 1753 IRRegArray* descr; /* Part of guest state treated as circular */ 1754 IRExpr* ix; /* Variable part of index into array */ 1755 Int bias; /* Constant offset part of index into array */ 1756 } GetI; 1757 1758 /* The value held by a temporary. 1759 ppIRExpr output: t<tmp>, eg. t1 1760 */ 1761 struct { 1762 IRTemp tmp; /* The temporary number */ 1763 } RdTmp; 1764 1765 /* A quaternary operation. 1766 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>), 1767 eg. MAddF64r32(t1, t2, t3, t4) 1768 */ 1769 struct { 1770 IRQop* details; 1771 } Qop; 1772 1773 /* A ternary operation. 1774 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>), 1775 eg. MulF64(1, 2.0, 3.0) 1776 */ 1777 struct { 1778 IRTriop* details; 1779 } Triop; 1780 1781 /* A binary operation. 1782 ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2) 1783 */ 1784 struct { 1785 IROp op; /* op-code */ 1786 IRExpr* arg1; /* operand 1 */ 1787 IRExpr* arg2; /* operand 2 */ 1788 } Binop; 1789 1790 /* A unary operation. 1791 ppIRExpr output: <op>(<arg>), eg. Neg8(t1) 1792 */ 1793 struct { 1794 IROp op; /* op-code */ 1795 IRExpr* arg; /* operand */ 1796 } Unop; 1797 1798 /* A load from memory -- a normal load, not a load-linked. 1799 Load-Linkeds (and Store-Conditionals) are instead represented 1800 by IRStmt.LLSC since Load-Linkeds have side effects and so 1801 are not semantically valid IRExpr's. 1802 ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1) 1803 */ 1804 struct { 1805 IREndness end; /* Endian-ness of the load */ 1806 IRType ty; /* Type of the loaded value */ 1807 IRExpr* addr; /* Address being loaded from */ 1808 } Load; 1809 1810 /* A constant-valued expression. 1811 ppIRExpr output: <con>, eg. 0x4:I32 1812 */ 1813 struct { 1814 IRConst* con; /* The constant itself */ 1815 } Const; 1816 1817 /* A call to a pure (no side-effects) helper C function. 1818 1819 With the 'cee' field, 'name' is the function's name. It is 1820 only used for pretty-printing purposes. The address to call 1821 (host address, of course) is stored in the 'addr' field 1822 inside 'cee'. 1823 1824 The 'args' field is a NULL-terminated array of arguments. 1825 The stated return IRType, and the implied argument types, 1826 must match that of the function being called well enough so 1827 that the back end can actually generate correct code for the 1828 call. 1829 1830 The called function **must** satisfy the following: 1831 1832 * no side effects -- must be a pure function, the result of 1833 which depends only on the passed parameters. 1834 1835 * it may not look at, nor modify, any of the guest state 1836 since that would hide guest state transitions from 1837 instrumenters 1838 1839 * it may not access guest memory, since that would hide 1840 guest memory transactions from the instrumenters 1841 1842 * it must not assume that arguments are being evaluated in a 1843 particular order. The oder of evaluation is unspecified. 1844 1845 This is restrictive, but makes the semantics clean, and does 1846 not interfere with IR optimisation. 1847 1848 If you want to call a helper which can mess with guest state 1849 and/or memory, instead use Ist_Dirty. This is a lot more 1850 flexible, but you have to give a bunch of details about what 1851 the helper does (and you better be telling the truth, 1852 otherwise any derived instrumentation will be wrong). Also 1853 Ist_Dirty inhibits various IR optimisations and so can cause 1854 quite poor code to be generated. Try to avoid it. 1855 1856 In principle it would be allowable to have the arg vector 1857 contain the special value IRExprP__VECRET, although not 1858 IRExprP__BBPTR. However, at the moment there is no 1859 requirement for clean helper calls to be able to return V128 1860 or V256 values. Hence this is not allowed. 1861 1862 ppIRExpr output: <cee>(<args>):<retty> 1863 eg. foo{0x80489304}(t1, t2):I32 1864 */ 1865 struct { 1866 IRCallee* cee; /* Function to call. */ 1867 IRType retty; /* Type of return value. */ 1868 IRExpr** args; /* Vector of argument expressions. */ 1869 } CCall; 1870 1871 /* A ternary if-then-else operator. It returns iftrue if cond is 1872 nonzero, iffalse otherwise. Note that it is STRICT, ie. both 1873 iftrue and iffalse are evaluated in all cases. 1874 1875 ppIRExpr output: ITE(<cond>,<iftrue>,<iffalse>), 1876 eg. ITE(t6,t7,t8) 1877 */ 1878 struct { 1879 IRExpr* cond; /* Condition */ 1880 IRExpr* iftrue; /* True expression */ 1881 IRExpr* iffalse; /* False expression */ 1882 } ITE; 1883 } Iex; 1884}; 1885 1886/* Expression auxiliaries: a ternary expression. */ 1887struct _IRTriop { 1888 IROp op; /* op-code */ 1889 IRExpr* arg1; /* operand 1 */ 1890 IRExpr* arg2; /* operand 2 */ 1891 IRExpr* arg3; /* operand 3 */ 1892}; 1893 1894/* Expression auxiliaries: a quarternary expression. */ 1895struct _IRQop { 1896 IROp op; /* op-code */ 1897 IRExpr* arg1; /* operand 1 */ 1898 IRExpr* arg2; /* operand 2 */ 1899 IRExpr* arg3; /* operand 3 */ 1900 IRExpr* arg4; /* operand 4 */ 1901}; 1902 1903 1904/* Two special constants of type IRExpr*, which can ONLY be used in 1905 argument lists for dirty helper calls (IRDirty.args) and in NO 1906 OTHER PLACES. And then only in very limited ways. These constants 1907 are not pointer-aligned and hence can't be confused with real 1908 IRExpr*s nor with NULL. */ 1909 1910/* Denotes an argument which (in the helper) takes a pointer to a 1911 (naturally aligned) V128 or V256, into which the helper is expected 1912 to write its result. Use of IRExprP__VECRET is strictly 1913 controlled. If the helper returns a V128 or V256 value then 1914 IRExprP__VECRET must appear exactly once in the arg list, although 1915 it can appear anywhere, and the helper must have a C 'void' return 1916 type. If the helper returns any other type, IRExprP__VECRET may 1917 not appear in the argument list. */ 1918#define IRExprP__VECRET ((IRExpr*)9) 1919 1920/* Denotes an void* argument which is passed to the helper, which at 1921 run time will point to the thread's guest state area. This can 1922 only appear at most once in an argument list, and it may not appear 1923 at all in argument lists for clean helper calls. */ 1924#define IRExprP__BBPTR ((IRExpr*)17) 1925 1926static inline Bool is_IRExprP__VECRET_or_BBPTR ( IRExpr* e ) { 1927 return e == IRExprP__VECRET || e == IRExprP__BBPTR; 1928} 1929 1930 1931/* Expression constructors. */ 1932extern IRExpr* IRExpr_Binder ( Int binder ); 1933extern IRExpr* IRExpr_Get ( Int off, IRType ty ); 1934extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias ); 1935extern IRExpr* IRExpr_RdTmp ( IRTemp tmp ); 1936extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, 1937 IRExpr* arg3, IRExpr* arg4 ); 1938extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, 1939 IRExpr* arg2, IRExpr* arg3 ); 1940extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ); 1941extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ); 1942extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ); 1943extern IRExpr* IRExpr_Const ( IRConst* con ); 1944extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args ); 1945extern IRExpr* IRExpr_ITE ( IRExpr* cond, IRExpr* iftrue, IRExpr* iffalse ); 1946 1947/* Deep-copy an IRExpr. */ 1948extern IRExpr* deepCopyIRExpr ( IRExpr* ); 1949 1950/* Pretty-print an IRExpr. */ 1951extern void ppIRExpr ( IRExpr* ); 1952 1953/* NULL-terminated IRExpr vector constructors, suitable for 1954 use as arg lists in clean/dirty helper calls. */ 1955extern IRExpr** mkIRExprVec_0 ( void ); 1956extern IRExpr** mkIRExprVec_1 ( IRExpr* ); 1957extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* ); 1958extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* ); 1959extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 1960extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1961 IRExpr* ); 1962extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1963 IRExpr*, IRExpr* ); 1964extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1965 IRExpr*, IRExpr*, IRExpr* ); 1966extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1967 IRExpr*, IRExpr*, IRExpr*, IRExpr*); 1968 1969/* IRExpr copiers: 1970 - shallowCopy: shallow-copy (ie. create a new vector that shares the 1971 elements with the original). 1972 - deepCopy: deep-copy (ie. create a completely new vector). */ 1973extern IRExpr** shallowCopyIRExprVec ( IRExpr** ); 1974extern IRExpr** deepCopyIRExprVec ( IRExpr** ); 1975 1976/* Make a constant expression from the given host word taking into 1977 account (of course) the host word size. */ 1978extern IRExpr* mkIRExpr_HWord ( HWord ); 1979 1980/* Convenience function for constructing clean helper calls. */ 1981extern 1982IRExpr* mkIRExprCCall ( IRType retty, 1983 Int regparms, const HChar* name, void* addr, 1984 IRExpr** args ); 1985 1986 1987/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or 1988 * Iex_Const). */ 1989static inline Bool isIRAtom ( IRExpr* e ) { 1990 return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const); 1991} 1992 1993/* Are these two IR atoms identical? Causes an assertion 1994 failure if they are passed non-atoms. */ 1995extern Bool eqIRAtom ( IRExpr*, IRExpr* ); 1996 1997 1998/* ------------------ Jump kinds ------------------ */ 1999 2000/* This describes hints which can be passed to the dispatcher at guest 2001 control-flow transfer points. 2002 2003 Re Ijk_TInval: the guest state _must_ have two pseudo-registers, 2004 guest_TISTART and guest_TILEN, which specify the start and length 2005 of the region to be invalidated. These are both the size of a 2006 guest word. It is the responsibility of the relevant toIR.c to 2007 ensure that these are filled in with suitable values before issuing 2008 a jump of kind Ijk_TInval. 2009 2010 Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a 2011 pseudo-register guest_EMNOTE, which is 32-bits regardless of the 2012 host or guest word size. That register should be made to hold a 2013 VexEmNote value to indicate the reason for the exit. 2014 2015 In the case of Ijk_EmFail, the exit is fatal (Vex-generated code 2016 cannot continue) and so the jump destination can be anything. 2017 2018 Re Ijk_Sys_ (syscall jumps): the guest state must have a 2019 pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest 2020 word. Front ends should set this to be the IP at the most recently 2021 executed kernel-entering (system call) instruction. This makes it 2022 very much easier (viz, actually possible at all) to back up the 2023 guest to restart a syscall that has been interrupted by a signal. 2024*/ 2025typedef 2026 enum { 2027 Ijk_INVALID=0x1A00, 2028 Ijk_Boring, /* not interesting; just goto next */ 2029 Ijk_Call, /* guest is doing a call */ 2030 Ijk_Ret, /* guest is doing a return */ 2031 Ijk_ClientReq, /* do guest client req before continuing */ 2032 Ijk_Yield, /* client is yielding to thread scheduler */ 2033 Ijk_EmWarn, /* report emulation warning before continuing */ 2034 Ijk_EmFail, /* emulation critical (FATAL) error; give up */ 2035 Ijk_NoDecode, /* current instruction cannot be decoded */ 2036 Ijk_MapFail, /* Vex-provided address translation failed */ 2037 Ijk_TInval, /* Invalidate translations before continuing. */ 2038 Ijk_NoRedir, /* Jump to un-redirected guest addr */ 2039 Ijk_SigTRAP, /* current instruction synths SIGTRAP */ 2040 Ijk_SigSEGV, /* current instruction synths SIGSEGV */ 2041 Ijk_SigBUS, /* current instruction synths SIGBUS */ 2042 Ijk_SigFPE_IntDiv, /* current instruction synths SIGFPE - IntDiv */ 2043 Ijk_SigFPE_IntOvf, /* current instruction synths SIGFPE - IntOvf */ 2044 /* Unfortunately, various guest-dependent syscall kinds. They 2045 all mean: do a syscall before continuing. */ 2046 Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc', arm 'svc #0' */ 2047 Ijk_Sys_int32, /* amd64/x86 'int $0x20' */ 2048 Ijk_Sys_int128, /* amd64/x86 'int $0x80' */ 2049 Ijk_Sys_int129, /* amd64/x86 'int $0x81' */ 2050 Ijk_Sys_int130, /* amd64/x86 'int $0x82' */ 2051 Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes 2052 invalid at the point this happens. */ 2053 } 2054 IRJumpKind; 2055 2056extern void ppIRJumpKind ( IRJumpKind ); 2057 2058 2059/* ------------------ Dirty helper calls ------------------ */ 2060 2061/* A dirty call is a flexible mechanism for calling (possibly 2062 conditionally) a helper function or procedure. The helper function 2063 may read, write or modify client memory, and may read, write or 2064 modify client state. It can take arguments and optionally return a 2065 value. It may return different results and/or do different things 2066 when called repeatedly with the same arguments, by means of storing 2067 private state. 2068 2069 If a value is returned, it is assigned to the nominated return 2070 temporary. 2071 2072 Dirty calls are statements rather than expressions for obvious 2073 reasons. If a dirty call is marked as writing guest state, any 2074 pre-existing values derived from the written parts of the guest 2075 state are invalid. Similarly, if the dirty call is stated as 2076 writing memory, any pre-existing loaded values are invalidated by 2077 it. 2078 2079 In order that instrumentation is possible, the call must state, and 2080 state correctly: 2081 2082 * Whether it reads, writes or modifies memory, and if so where. 2083 2084 * Whether it reads, writes or modifies guest state, and if so which 2085 pieces. Several pieces may be stated, and their extents must be 2086 known at translation-time. Each piece is allowed to repeat some 2087 number of times at a fixed interval, if required. 2088 2089 Normally, code is generated to pass just the args to the helper. 2090 However, if IRExprP__BBPTR is present in the argument list (at most 2091 one instance is allowed), then the baseblock pointer is passed for 2092 that arg, so that the callee can access the guest state. It is 2093 invalid for .nFxState to be zero but IRExprP__BBPTR to be present, 2094 since .nFxState==0 is a claim that the call does not access guest 2095 state. 2096 2097 IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The 2098 arguments and 'mFx' are evaluated REGARDLESS of the guard value. 2099 The order of argument evaluation is unspecified. The guard 2100 expression is evaluated AFTER the arguments and 'mFx' have been 2101 evaluated. 'mFx' is expected (by Memcheck) to be a defined value 2102 even if the guard evaluates to false. 2103*/ 2104 2105#define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */ 2106 2107/* Effects on resources (eg. registers, memory locations) */ 2108typedef 2109 enum { 2110 Ifx_None=0x1B00, /* no effect */ 2111 Ifx_Read, /* reads the resource */ 2112 Ifx_Write, /* writes the resource */ 2113 Ifx_Modify, /* modifies the resource */ 2114 } 2115 IREffect; 2116 2117/* Pretty-print an IREffect */ 2118extern void ppIREffect ( IREffect ); 2119 2120typedef 2121 struct _IRDirty { 2122 /* What to call, and details of args/results. .guard must be 2123 non-NULL. If .tmp is not IRTemp_INVALID, then the call 2124 returns a result which is placed in .tmp. If at runtime the 2125 guard evaluates to false, .tmp has an 0x555..555 bit pattern 2126 written to it. Hence conditional calls that assign .tmp are 2127 allowed. */ 2128 IRCallee* cee; /* where to call */ 2129 IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */ 2130 /* The args vector may contain IRExprP__BBPTR and/or 2131 IRExprP__VECRET, in both cases, at most once. */ 2132 IRExpr** args; /* arg vector, ends in NULL. */ 2133 IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */ 2134 2135 /* Mem effects; we allow only one R/W/M region to be stated */ 2136 IREffect mFx; /* indicates memory effects, if any */ 2137 IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */ 2138 Int mSize; /* of access, or zero if mFx==Ifx_None */ 2139 2140 /* Guest state effects; up to N allowed */ 2141 Int nFxState; /* must be 0 .. VEX_N_FXSTATE */ 2142 struct { 2143 IREffect fx:16; /* read, write or modify? Ifx_None is invalid. */ 2144 UShort offset; 2145 UShort size; 2146 UChar nRepeats; 2147 UChar repeatLen; 2148 } fxState[VEX_N_FXSTATE]; 2149 /* The access can be repeated, as specified by nRepeats and 2150 repeatLen. To describe only a single access, nRepeats and 2151 repeatLen should be zero. Otherwise, repeatLen must be a 2152 multiple of size and greater than size. */ 2153 /* Overall, the parts of the guest state denoted by (offset, 2154 size, nRepeats, repeatLen) is 2155 [offset, +size) 2156 and, if nRepeats > 0, 2157 for (i = 1; i <= nRepeats; i++) 2158 [offset + i * repeatLen, +size) 2159 A convenient way to enumerate all segments is therefore 2160 for (i = 0; i < 1 + nRepeats; i++) 2161 [offset + i * repeatLen, +size) 2162 */ 2163 } 2164 IRDirty; 2165 2166/* Pretty-print a dirty call */ 2167extern void ppIRDirty ( IRDirty* ); 2168 2169/* Allocate an uninitialised dirty call */ 2170extern IRDirty* emptyIRDirty ( void ); 2171 2172/* Deep-copy a dirty call */ 2173extern IRDirty* deepCopyIRDirty ( IRDirty* ); 2174 2175/* A handy function which takes some of the tedium out of constructing 2176 dirty helper calls. The called function impliedly does not return 2177 any value and has a constant-True guard. The call is marked as 2178 accessing neither guest state nor memory (hence the "unsafe" 2179 designation) -- you can change this marking later if need be. A 2180 suitable IRCallee is constructed from the supplied bits. */ 2181extern 2182IRDirty* unsafeIRDirty_0_N ( Int regparms, const HChar* name, void* addr, 2183 IRExpr** args ); 2184 2185/* Similarly, make a zero-annotation dirty call which returns a value, 2186 and assign that to the given temp. */ 2187extern 2188IRDirty* unsafeIRDirty_1_N ( IRTemp dst, 2189 Int regparms, const HChar* name, void* addr, 2190 IRExpr** args ); 2191 2192 2193/* --------------- Memory Bus Events --------------- */ 2194 2195typedef 2196 enum { 2197 Imbe_Fence=0x1C00, 2198 /* Needed only on ARM. It cancels a reservation made by a 2199 preceding Linked-Load, and needs to be handed through to the 2200 back end, just as LL and SC themselves are. */ 2201 Imbe_CancelReservation 2202 } 2203 IRMBusEvent; 2204 2205extern void ppIRMBusEvent ( IRMBusEvent ); 2206 2207 2208/* --------------- Compare and Swap --------------- */ 2209 2210/* This denotes an atomic compare and swap operation, either 2211 a single-element one or a double-element one. 2212 2213 In the single-element case: 2214 2215 .addr is the memory address. 2216 .end is the endianness with which memory is accessed 2217 2218 If .addr contains the same value as .expdLo, then .dataLo is 2219 written there, else there is no write. In both cases, the 2220 original value at .addr is copied into .oldLo. 2221 2222 Types: .expdLo, .dataLo and .oldLo must all have the same type. 2223 It may be any integral type, viz: I8, I16, I32 or, for 64-bit 2224 guests, I64. 2225 2226 .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must 2227 be NULL. 2228 2229 In the double-element case: 2230 2231 .addr is the memory address. 2232 .end is the endianness with which memory is accessed 2233 2234 The operation is the same: 2235 2236 If .addr contains the same value as .expdHi:.expdLo, then 2237 .dataHi:.dataLo is written there, else there is no write. In 2238 both cases the original value at .addr is copied into 2239 .oldHi:.oldLo. 2240 2241 Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must 2242 all have the same type, which may be any integral type, viz: I8, 2243 I16, I32 or, for 64-bit guests, I64. 2244 2245 The double-element case is complicated by the issue of 2246 endianness. In all cases, the two elements are understood to be 2247 located adjacently in memory, starting at the address .addr. 2248 2249 If .end is Iend_LE, then the .xxxLo component is at the lower 2250 address and the .xxxHi component is at the higher address, and 2251 each component is itself stored little-endianly. 2252 2253 If .end is Iend_BE, then the .xxxHi component is at the lower 2254 address and the .xxxLo component is at the higher address, and 2255 each component is itself stored big-endianly. 2256 2257 This allows representing more cases than most architectures can 2258 handle. For example, x86 cannot do DCAS on 8- or 16-bit elements. 2259 2260 How to know if the CAS succeeded? 2261 2262 * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo), 2263 then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now 2264 stored at .addr, and the original value there was .oldLo (resp 2265 .oldHi:.oldLo). 2266 2267 * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo), 2268 then the CAS failed, and the original value at .addr was .oldLo 2269 (resp. .oldHi:.oldLo). 2270 2271 Hence it is easy to know whether or not the CAS succeeded. 2272*/ 2273typedef 2274 struct { 2275 IRTemp oldHi; /* old value of *addr is written here */ 2276 IRTemp oldLo; 2277 IREndness end; /* endianness of the data in memory */ 2278 IRExpr* addr; /* store address */ 2279 IRExpr* expdHi; /* expected old value at *addr */ 2280 IRExpr* expdLo; 2281 IRExpr* dataHi; /* new value for *addr */ 2282 IRExpr* dataLo; 2283 } 2284 IRCAS; 2285 2286extern void ppIRCAS ( IRCAS* cas ); 2287 2288extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo, 2289 IREndness end, IRExpr* addr, 2290 IRExpr* expdHi, IRExpr* expdLo, 2291 IRExpr* dataHi, IRExpr* dataLo ); 2292 2293extern IRCAS* deepCopyIRCAS ( IRCAS* ); 2294 2295 2296/* ------------------ Circular Array Put ------------------ */ 2297 2298typedef 2299 struct { 2300 IRRegArray* descr; /* Part of guest state treated as circular */ 2301 IRExpr* ix; /* Variable part of index into array */ 2302 Int bias; /* Constant offset part of index into array */ 2303 IRExpr* data; /* The value to write */ 2304 } IRPutI; 2305 2306extern void ppIRPutI ( IRPutI* puti ); 2307 2308extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix, 2309 Int bias, IRExpr* data ); 2310 2311extern IRPutI* deepCopyIRPutI ( IRPutI* ); 2312 2313 2314/* --------------- Guarded loads and stores --------------- */ 2315 2316/* Conditional stores are straightforward. They are the same as 2317 normal stores, with an extra 'guard' field :: Ity_I1 that 2318 determines whether or not the store actually happens. If not, 2319 memory is unmodified. 2320 2321 The semantics of this is that 'addr' and 'data' are fully evaluated 2322 even in the case where 'guard' evaluates to zero (false). 2323*/ 2324typedef 2325 struct { 2326 IREndness end; /* Endianness of the store */ 2327 IRExpr* addr; /* store address */ 2328 IRExpr* data; /* value to write */ 2329 IRExpr* guard; /* Guarding value */ 2330 } 2331 IRStoreG; 2332 2333/* Conditional loads are a little more complex. 'addr' is the 2334 address, 'guard' is the guarding condition. If the load takes 2335 place, the loaded value is placed in 'dst'. If it does not take 2336 place, 'alt' is copied to 'dst'. However, the loaded value is not 2337 placed directly in 'dst' -- it is first subjected to the conversion 2338 specified by 'cvt'. 2339 2340 For example, imagine doing a conditional 8-bit load, in which the 2341 loaded value is zero extended to 32 bits. Hence: 2342 * 'dst' and 'alt' must have type I32 2343 * 'cvt' must be a unary op which converts I8 to I32. In this 2344 example, it would be ILGop_8Uto32. 2345 2346 There is no explicit indication of the type at which the load is 2347 done, since that is inferrable from the arg type of 'cvt'. Note 2348 that the types of 'alt' and 'dst' and the result type of 'cvt' must 2349 all be the same. 2350 2351 Semantically, 'addr' is evaluated even in the case where 'guard' 2352 evaluates to zero (false), and 'alt' is evaluated even when 'guard' 2353 evaluates to one (true). That is, 'addr' and 'alt' are always 2354 evaluated. 2355*/ 2356typedef 2357 enum { 2358 ILGop_INVALID=0x1D00, 2359 ILGop_Ident32, /* 32 bit, no conversion */ 2360 ILGop_16Uto32, /* 16 bit load, Z-widen to 32 */ 2361 ILGop_16Sto32, /* 16 bit load, S-widen to 32 */ 2362 ILGop_8Uto32, /* 8 bit load, Z-widen to 32 */ 2363 ILGop_8Sto32 /* 8 bit load, S-widen to 32 */ 2364 } 2365 IRLoadGOp; 2366 2367typedef 2368 struct { 2369 IREndness end; /* Endianness of the load */ 2370 IRLoadGOp cvt; /* Conversion to apply to the loaded value */ 2371 IRTemp dst; /* Destination (LHS) of assignment */ 2372 IRExpr* addr; /* Address being loaded from */ 2373 IRExpr* alt; /* Value if load is not done. */ 2374 IRExpr* guard; /* Guarding value */ 2375 } 2376 IRLoadG; 2377 2378extern void ppIRStoreG ( IRStoreG* sg ); 2379 2380extern void ppIRLoadGOp ( IRLoadGOp cvt ); 2381 2382extern void ppIRLoadG ( IRLoadG* lg ); 2383 2384extern IRStoreG* mkIRStoreG ( IREndness end, 2385 IRExpr* addr, IRExpr* data, 2386 IRExpr* guard ); 2387 2388extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt, 2389 IRTemp dst, IRExpr* addr, IRExpr* alt, 2390 IRExpr* guard ); 2391 2392 2393/* ------------------ Statements ------------------ */ 2394 2395/* The different kinds of statements. Their meaning is explained 2396 below in the comments for IRStmt. 2397 2398 Those marked META do not represent code, but rather extra 2399 information about the code. These statements can be removed 2400 without affecting the functional behaviour of the code, however 2401 they are required by some IR consumers such as tools that 2402 instrument the code. 2403*/ 2404 2405typedef 2406 enum { 2407 Ist_NoOp=0x1E00, 2408 Ist_IMark, /* META */ 2409 Ist_AbiHint, /* META */ 2410 Ist_Put, 2411 Ist_PutI, 2412 Ist_WrTmp, 2413 Ist_Store, 2414 Ist_LoadG, 2415 Ist_StoreG, 2416 Ist_CAS, 2417 Ist_LLSC, 2418 Ist_Dirty, 2419 Ist_MBE, 2420 Ist_Exit 2421 } 2422 IRStmtTag; 2423 2424/* A statement. Stored as a tagged union. 'tag' indicates what kind 2425 of expression this is. 'Ist' is the union that holds the fields. 2426 If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store 2427 statement, and the fields can be accessed with 2428 'st.Ist.Store.<fieldname>'. 2429 2430 For each kind of statement, we show what it looks like when 2431 pretty-printed with ppIRStmt(). 2432*/ 2433typedef 2434 struct _IRStmt { 2435 IRStmtTag tag; 2436 union { 2437 /* A no-op (usually resulting from IR optimisation). Can be 2438 omitted without any effect. 2439 2440 ppIRStmt output: IR-NoOp 2441 */ 2442 struct { 2443 } NoOp; 2444 2445 /* META: instruction mark. Marks the start of the statements 2446 that represent a single machine instruction (the end of 2447 those statements is marked by the next IMark or the end of 2448 the IRSB). Contains the address and length of the 2449 instruction. 2450 2451 It also contains a delta value. The delta must be 2452 subtracted from a guest program counter value before 2453 attempting to establish, by comparison with the address 2454 and length values, whether or not that program counter 2455 value refers to this instruction. For x86, amd64, ppc32, 2456 ppc64 and arm, the delta value is zero. For Thumb 2457 instructions, the delta value is one. This is because, on 2458 Thumb, guest PC values (guest_R15T) are encoded using the 2459 top 31 bits of the instruction address and a 1 in the lsb; 2460 hence they appear to be (numerically) 1 past the start of 2461 the instruction they refer to. IOW, guest_R15T on ARM 2462 holds a standard ARM interworking address. 2463 2464 ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, 2465 eg. ------ IMark(0x4000792, 5, 0) ------, 2466 */ 2467 struct { 2468 Addr64 addr; /* instruction address */ 2469 Int len; /* instruction length */ 2470 UChar delta; /* addr = program counter as encoded in guest state 2471 - delta */ 2472 } IMark; 2473 2474 /* META: An ABI hint, which says something about this 2475 platform's ABI. 2476 2477 At the moment, the only AbiHint is one which indicates 2478 that a given chunk of address space, [base .. base+len-1], 2479 has become undefined. This is used on amd64-linux and 2480 some ppc variants to pass stack-redzoning hints to whoever 2481 wants to see them. It also indicates the address of the 2482 next (dynamic) instruction that will be executed. This is 2483 to help Memcheck to origin tracking. 2484 2485 ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== 2486 eg. ====== AbiHint(t1, 16, t2) ====== 2487 */ 2488 struct { 2489 IRExpr* base; /* Start of undefined chunk */ 2490 Int len; /* Length of undefined chunk */ 2491 IRExpr* nia; /* Address of next (guest) insn */ 2492 } AbiHint; 2493 2494 /* Write a guest register, at a fixed offset in the guest state. 2495 ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 2496 */ 2497 struct { 2498 Int offset; /* Offset into the guest state */ 2499 IRExpr* data; /* The value to write */ 2500 } Put; 2501 2502 /* Write a guest register, at a non-fixed offset in the guest 2503 state. See the comment for GetI expressions for more 2504 information. 2505 2506 ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, 2507 eg. PUTI(64:8xF64)[t5,0] = t1 2508 */ 2509 struct { 2510 IRPutI* details; 2511 } PutI; 2512 2513 /* Assign a value to a temporary. Note that SSA rules require 2514 each tmp is only assigned to once. IR sanity checking will 2515 reject any block containing a temporary which is not assigned 2516 to exactly once. 2517 2518 ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 2519 */ 2520 struct { 2521 IRTemp tmp; /* Temporary (LHS of assignment) */ 2522 IRExpr* data; /* Expression (RHS of assignment) */ 2523 } WrTmp; 2524 2525 /* Write a value to memory. This is a normal store, not a 2526 Store-Conditional. To represent a Store-Conditional, 2527 instead use IRStmt.LLSC. 2528 ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 2529 */ 2530 struct { 2531 IREndness end; /* Endianness of the store */ 2532 IRExpr* addr; /* store address */ 2533 IRExpr* data; /* value to write */ 2534 } Store; 2535 2536 /* Guarded store. Note that this is defined to evaluate all 2537 expression fields (addr, data) even if the guard evaluates 2538 to false. 2539 ppIRStmt output: 2540 if (<guard>) ST<end>(<addr>) = <data> */ 2541 struct { 2542 IRStoreG* details; 2543 } StoreG; 2544 2545 /* Guarded load. Note that this is defined to evaluate all 2546 expression fields (addr, alt) even if the guard evaluates 2547 to false. 2548 ppIRStmt output: 2549 t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */ 2550 struct { 2551 IRLoadG* details; 2552 } LoadG; 2553 2554 /* Do an atomic compare-and-swap operation. Semantics are 2555 described above on a comment at the definition of IRCAS. 2556 2557 ppIRStmt output: 2558 t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) 2559 eg 2560 t1 = CASle(t2 :: t3->Add32(t3,1)) 2561 which denotes a 32-bit atomic increment 2562 of a value at address t2 2563 2564 A double-element CAS may also be denoted, in which case <tmp>, 2565 <expected> and <new> are all pairs of items, separated by 2566 commas. 2567 */ 2568 struct { 2569 IRCAS* details; 2570 } CAS; 2571 2572 /* Either Load-Linked or Store-Conditional, depending on 2573 STOREDATA. 2574 2575 If STOREDATA is NULL then this is a Load-Linked, meaning 2576 that data is loaded from memory as normal, but a 2577 'reservation' for the address is also lodged in the 2578 hardware. 2579 2580 result = Load-Linked(addr, end) 2581 2582 The data transfer type is the type of RESULT (I32, I64, 2583 etc). ppIRStmt output: 2584 2585 result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) 2586 2587 If STOREDATA is not NULL then this is a Store-Conditional, 2588 hence: 2589 2590 result = Store-Conditional(addr, storedata, end) 2591 2592 The data transfer type is the type of STOREDATA and RESULT 2593 has type Ity_I1. The store may fail or succeed depending 2594 on the state of a previously lodged reservation on this 2595 address. RESULT is written 1 if the store succeeds and 0 2596 if it fails. eg ppIRStmt output: 2597 2598 result = ( ST<end>-Cond(<addr>) = <storedata> ) 2599 eg t3 = ( STbe-Cond(t1, t2) ) 2600 2601 In all cases, the address must be naturally aligned for 2602 the transfer type -- any misaligned addresses should be 2603 caught by a dominating IR check and side exit. This 2604 alignment restriction exists because on at least some 2605 LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on 2606 misaligned addresses, and we have to actually generate 2607 stwcx. on the host, and we don't want it trapping on the 2608 host. 2609 2610 Summary of rules for transfer type: 2611 STOREDATA == NULL (LL): 2612 transfer type = type of RESULT 2613 STOREDATA != NULL (SC): 2614 transfer type = type of STOREDATA, and RESULT :: Ity_I1 2615 */ 2616 struct { 2617 IREndness end; 2618 IRTemp result; 2619 IRExpr* addr; 2620 IRExpr* storedata; /* NULL => LL, non-NULL => SC */ 2621 } LLSC; 2622 2623 /* Call (possibly conditionally) a C function that has side 2624 effects (ie. is "dirty"). See the comments above the 2625 IRDirty type declaration for more information. 2626 2627 ppIRStmt output: 2628 t<tmp> = DIRTY <guard> <effects> 2629 ::: <callee>(<args>) 2630 eg. 2631 t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) 2632 ::: foo{0x380035f4}(t2) 2633 */ 2634 struct { 2635 IRDirty* details; 2636 } Dirty; 2637 2638 /* A memory bus event - a fence, or acquisition/release of the 2639 hardware bus lock. IR optimisation treats all these as fences 2640 across which no memory references may be moved. 2641 ppIRStmt output: MBusEvent-Fence, 2642 MBusEvent-BusLock, MBusEvent-BusUnlock. 2643 */ 2644 struct { 2645 IRMBusEvent event; 2646 } MBE; 2647 2648 /* Conditional exit from the middle of an IRSB. 2649 ppIRStmt output: if (<guard>) goto {<jk>} <dst> 2650 eg. if (t69) goto {Boring} 0x4000AAA:I32 2651 If <guard> is true, the guest state is also updated by 2652 PUT-ing <dst> at <offsIP>. This is done because a 2653 taken exit must update the guest program counter. 2654 */ 2655 struct { 2656 IRExpr* guard; /* Conditional expression */ 2657 IRConst* dst; /* Jump target (constant only) */ 2658 IRJumpKind jk; /* Jump kind */ 2659 Int offsIP; /* Guest state offset for IP */ 2660 } Exit; 2661 } Ist; 2662 } 2663 IRStmt; 2664 2665/* Statement constructors. */ 2666extern IRStmt* IRStmt_NoOp ( void ); 2667extern IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta ); 2668extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ); 2669extern IRStmt* IRStmt_Put ( Int off, IRExpr* data ); 2670extern IRStmt* IRStmt_PutI ( IRPutI* details ); 2671extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ); 2672extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ); 2673extern IRStmt* IRStmt_StoreG ( IREndness end, IRExpr* addr, IRExpr* data, 2674 IRExpr* guard ); 2675extern IRStmt* IRStmt_LoadG ( IREndness end, IRLoadGOp cvt, IRTemp dst, 2676 IRExpr* addr, IRExpr* alt, IRExpr* guard ); 2677extern IRStmt* IRStmt_CAS ( IRCAS* details ); 2678extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result, 2679 IRExpr* addr, IRExpr* storedata ); 2680extern IRStmt* IRStmt_Dirty ( IRDirty* details ); 2681extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); 2682extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst, 2683 Int offsIP ); 2684 2685/* Deep-copy an IRStmt. */ 2686extern IRStmt* deepCopyIRStmt ( IRStmt* ); 2687 2688/* Pretty-print an IRStmt. */ 2689extern void ppIRStmt ( IRStmt* ); 2690 2691 2692/* ------------------ Basic Blocks ------------------ */ 2693 2694/* Type environments: a bunch of statements, expressions, etc, are 2695 incomplete without an environment indicating the type of each 2696 IRTemp. So this provides one. IR temporaries are really just 2697 unsigned ints and so this provides an array, 0 .. n_types_used-1 of 2698 them. 2699*/ 2700typedef 2701 struct { 2702 IRType* types; 2703 Int types_size; 2704 Int types_used; 2705 } 2706 IRTypeEnv; 2707 2708/* Obtain a new IRTemp */ 2709extern IRTemp newIRTemp ( IRTypeEnv*, IRType ); 2710 2711/* Deep-copy a type environment */ 2712extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* ); 2713 2714/* Pretty-print a type environment */ 2715extern void ppIRTypeEnv ( IRTypeEnv* ); 2716 2717 2718/* Code blocks, which in proper compiler terminology are superblocks 2719 (single entry, multiple exit code sequences) contain: 2720 2721 - A table giving a type for each temp (the "type environment") 2722 - An expandable array of statements 2723 - An expression of type 32 or 64 bits, depending on the 2724 guest's word size, indicating the next destination if the block 2725 executes all the way to the end, without a side exit 2726 - An indication of any special actions (JumpKind) needed 2727 for this final jump. 2728 - Offset of the IP field in the guest state. This will be 2729 updated before the final jump is done. 2730 2731 "IRSB" stands for "IR Super Block". 2732*/ 2733typedef 2734 struct { 2735 IRTypeEnv* tyenv; 2736 IRStmt** stmts; 2737 Int stmts_size; 2738 Int stmts_used; 2739 IRExpr* next; 2740 IRJumpKind jumpkind; 2741 Int offsIP; 2742 } 2743 IRSB; 2744 2745/* Allocate a new, uninitialised IRSB */ 2746extern IRSB* emptyIRSB ( void ); 2747 2748/* Deep-copy an IRSB */ 2749extern IRSB* deepCopyIRSB ( IRSB* ); 2750 2751/* Deep-copy an IRSB, except for the statements list, which set to be 2752 a new, empty, list of statements. */ 2753extern IRSB* deepCopyIRSBExceptStmts ( IRSB* ); 2754 2755/* Pretty-print an IRSB */ 2756extern void ppIRSB ( IRSB* ); 2757 2758/* Append an IRStmt to an IRSB */ 2759extern void addStmtToIRSB ( IRSB*, IRStmt* ); 2760 2761 2762/*---------------------------------------------------------------*/ 2763/*--- Helper functions for the IR ---*/ 2764/*---------------------------------------------------------------*/ 2765 2766/* For messing with IR type environments */ 2767extern IRTypeEnv* emptyIRTypeEnv ( void ); 2768 2769/* What is the type of this expression? */ 2770extern IRType typeOfIRConst ( IRConst* ); 2771extern IRType typeOfIRTemp ( IRTypeEnv*, IRTemp ); 2772extern IRType typeOfIRExpr ( IRTypeEnv*, IRExpr* ); 2773 2774/* What are the arg and result type for this IRLoadGOp? */ 2775extern void typeOfIRLoadGOp ( IRLoadGOp cvt, 2776 /*OUT*/IRType* t_res, 2777 /*OUT*/IRType* t_arg ); 2778 2779/* Sanity check a BB of IR */ 2780extern void sanityCheckIRSB ( IRSB* bb, 2781 const HChar* caller, 2782 Bool require_flatness, 2783 IRType guest_word_size ); 2784extern Bool isFlatIRStmt ( IRStmt* ); 2785 2786/* Is this any value actually in the enumeration 'IRType' ? */ 2787extern Bool isPlausibleIRType ( IRType ty ); 2788 2789 2790/*---------------------------------------------------------------*/ 2791/*--- IR injection ---*/ 2792/*---------------------------------------------------------------*/ 2793void vex_inject_ir(IRSB *, IREndness); 2794 2795 2796#endif /* ndef __LIBVEX_IR_H */ 2797 2798 2799/*---------------------------------------------------------------*/ 2800/*--- libvex_ir.h ---*/ 2801/*---------------------------------------------------------------*/ 2802