libvex_ir.h revision e71e56a90e91ce37b0ee846a4ff94493d59f2095
1 2/*---------------------------------------------------------------*/ 3/*--- begin libvex_ir.h ---*/ 4/*---------------------------------------------------------------*/ 5 6/* 7 This file is part of Valgrind, a dynamic binary instrumentation 8 framework. 9 10 Copyright (C) 2004-2010 OpenWorks LLP 11 info@open-works.net 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 26 02110-1301, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29 30 Neither the names of the U.S. Department of Energy nor the 31 University of California nor the names of its contributors may be 32 used to endorse or promote products derived from this software 33 without prior written permission. 34*/ 35 36#ifndef __LIBVEX_IR_H 37#define __LIBVEX_IR_H 38 39#include "libvex_basictypes.h" 40 41 42/*---------------------------------------------------------------*/ 43/*--- High-level IR description ---*/ 44/*---------------------------------------------------------------*/ 45 46/* Vex IR is an architecture-neutral intermediate representation. 47 Unlike some IRs in systems similar to Vex, it is not like assembly 48 language (ie. a list of instructions). Rather, it is more like the 49 IR that might be used in a compiler. 50 51 Code blocks 52 ~~~~~~~~~~~ 53 The code is broken into small code blocks ("superblocks", type: 54 'IRSB'). Each code block typically represents from 1 to perhaps 50 55 instructions. IRSBs are single-entry, multiple-exit code blocks. 56 Each IRSB contains three things: 57 - a type environment, which indicates the type of each temporary 58 value present in the IRSB 59 - a list of statements, which represent code 60 - a jump that exits from the end the IRSB 61 Because the blocks are multiple-exit, there can be additional 62 conditional exit statements that cause control to leave the IRSB 63 before the final exit. Also because of this, IRSBs can cover 64 multiple non-consecutive sequences of code (up to 3). These are 65 recorded in the type VexGuestExtents (see libvex.h). 66 67 Statements and expressions 68 ~~~~~~~~~~~~~~~~~~~~~~~~~~ 69 Statements (type 'IRStmt') represent operations with side-effects, 70 eg. guest register writes, stores, and assignments to temporaries. 71 Expressions (type 'IRExpr') represent operations without 72 side-effects, eg. arithmetic operations, loads, constants. 73 Expressions can contain sub-expressions, forming expression trees, 74 eg. (3 + (4 * load(addr1)). 75 76 Storage of guest state 77 ~~~~~~~~~~~~~~~~~~~~~~ 78 The "guest state" contains the guest registers of the guest machine 79 (ie. the machine that we are simulating). It is stored by default 80 in a block of memory supplied by the user of the VEX library, 81 generally referred to as the guest state (area). To operate on 82 these registers, one must first read ("Get") them from the guest 83 state into a temporary value. Afterwards, one can write ("Put") 84 them back into the guest state. 85 86 Get and Put are characterised by a byte offset into the guest 87 state, a small integer which effectively gives the identity of the 88 referenced guest register, and a type, which indicates the size of 89 the value to be transferred. 90 91 The basic "Get" and "Put" operations are sufficient to model normal 92 fixed registers on the guest. Selected areas of the guest state 93 can be treated as a circular array of registers (type: 94 'IRRegArray'), which can be indexed at run-time. This is done with 95 the "GetI" and "PutI" primitives. This is necessary to describe 96 rotating register files, for example the x87 FPU stack, SPARC 97 register windows, and the Itanium register files. 98 99 Examples, and flattened vs. unflattened code 100 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 For example, consider this x86 instruction: 102 103 addl %eax, %ebx 104 105 One Vex IR translation for this code would be this: 106 107 ------ IMark(0x24F275, 7, 0) ------ 108 t3 = GET:I32(0) # get %eax, a 32-bit integer 109 t2 = GET:I32(12) # get %ebx, a 32-bit integer 110 t1 = Add32(t3,t2) # addl 111 PUT(0) = t1 # put %eax 112 113 (For simplicity, this ignores the effects on the condition codes, and 114 the update of the instruction pointer.) 115 116 The "IMark" is an IR statement that doesn't represent actual code. 117 Instead it indicates the address and length of the original 118 instruction. The numbers 0 and 12 are offsets into the guest state 119 for %eax and %ebx. The full list of offsets for an architecture 120 <ARCH> can be found in the type VexGuest<ARCH>State in the file 121 VEX/pub/libvex_guest_<ARCH>.h. 122 123 The five statements in this example are: 124 - the IMark 125 - three assignments to temporaries 126 - one register write (put) 127 128 The six expressions in this example are: 129 - two register reads (gets) 130 - one arithmetic (add) operation 131 - three temporaries (two nested within the Add32, one in the PUT) 132 133 The above IR is "flattened", ie. all sub-expressions are "atoms", 134 either constants or temporaries. An equivalent, unflattened version 135 would be: 136 137 PUT(0) = Add32(GET:I32(0), GET:I32(12)) 138 139 IR is guaranteed to be flattened at instrumentation-time. This makes 140 instrumentation easier. Equivalent flattened and unflattened IR 141 typically results in the same generated code. 142 143 Another example, this one showing loads and stores: 144 145 addl %edx,4(%eax) 146 147 This becomes (again ignoring condition code and instruction pointer 148 updates): 149 150 ------ IMark(0x4000ABA, 3, 0) ------ 151 t3 = Add32(GET:I32(0),0x4:I32) 152 t2 = LDle:I32(t3) 153 t1 = GET:I32(8) 154 t0 = Add32(t2,t1) 155 STle(t3) = t0 156 157 The "le" in "LDle" and "STle" is short for "little-endian". 158 159 No need for deallocations 160 ~~~~~~~~~~~~~~~~~~~~~~~~~ 161 Although there are allocation functions for various data structures 162 in this file, there are no deallocation functions. This is because 163 Vex uses a memory allocation scheme that automatically reclaims the 164 memory used by allocated structures once translation is completed. 165 This makes things easier for tools that instruments/transforms code 166 blocks. 167 168 SSAness and typing 169 ~~~~~~~~~~~~~~~~~~ 170 The IR is fully typed. For every IRSB (IR block) it is possible to 171 say unambiguously whether or not it is correctly typed. 172 Incorrectly typed IR has no meaning and the VEX will refuse to 173 process it. At various points during processing VEX typechecks the 174 IR and aborts if any violations are found. This seems overkill but 175 makes it a great deal easier to build a reliable JIT. 176 177 IR also has the SSA property. SSA stands for Static Single 178 Assignment, and what it means is that each IR temporary may be 179 assigned to only once. This idea became widely used in compiler 180 construction in the mid to late 90s. It makes many IR-level 181 transformations/code improvements easier, simpler and faster. 182 Whenever it typechecks an IR block, VEX also checks the SSA 183 property holds, and will abort if not so. So SSAness is 184 mechanically and rigidly enforced. 185*/ 186 187/*---------------------------------------------------------------*/ 188/*--- Type definitions for the IR ---*/ 189/*---------------------------------------------------------------*/ 190 191/* General comments about naming schemes: 192 193 All publically visible functions contain the name of the primary 194 type on which they operate (IRFoo, IRBar, etc). Hence you should 195 be able to identify these functions by grepping for "IR[A-Z]". 196 197 For some type 'IRFoo': 198 199 - ppIRFoo is the printing method for IRFoo, printing it to the 200 output channel specified in the LibVEX_Initialise call. 201 202 - eqIRFoo is a structural equality predicate for IRFoos. 203 204 - deepCopyIRFoo is a deep copy constructor for IRFoos. 205 It recursively traverses the entire argument tree and 206 produces a complete new tree. All types have a deep copy 207 constructor. 208 209 - shallowCopyIRFoo is the shallow copy constructor for IRFoos. 210 It creates a new top-level copy of the supplied object, 211 but does not copy any sub-objects. Only some types have a 212 shallow copy constructor. 213*/ 214 215/* ------------------ Types ------------------ */ 216 217/* A type indicates the size of a value, and whether it's an integer, a 218 float, or a vector (SIMD) value. */ 219typedef 220 enum { 221 Ity_INVALID=0x11000, 222 Ity_I1, 223 Ity_I8, 224 Ity_I16, 225 Ity_I32, 226 Ity_I64, 227 Ity_I128, /* 128-bit scalar */ 228 Ity_F32, /* IEEE 754 float */ 229 Ity_F64, /* IEEE 754 double */ 230 Ity_F128, /* 128-bit floating point; implementation defined */ 231 Ity_V128 /* 128-bit SIMD */ 232 } 233 IRType; 234 235/* Pretty-print an IRType */ 236extern void ppIRType ( IRType ); 237 238/* Get the size (in bytes) of an IRType */ 239extern Int sizeofIRType ( IRType ); 240 241 242/* ------------------ Endianness ------------------ */ 243 244/* IREndness is used in load IRExprs and store IRStmts. */ 245typedef 246 enum { 247 Iend_LE=0x12000, /* little endian */ 248 Iend_BE /* big endian */ 249 } 250 IREndness; 251 252 253/* ------------------ Constants ------------------ */ 254 255/* IRConsts are used within 'Const' and 'Exit' IRExprs. */ 256 257/* The various kinds of constant. */ 258typedef 259 enum { 260 Ico_U1=0x13000, 261 Ico_U8, 262 Ico_U16, 263 Ico_U32, 264 Ico_U64, 265 Ico_F32, /* 32-bit IEEE754 floating */ 266 Ico_F32i, /* 32-bit unsigned int to be interpreted literally 267 as a IEEE754 single value. */ 268 Ico_F64, /* 64-bit IEEE754 floating */ 269 Ico_F64i, /* 64-bit unsigned int to be interpreted literally 270 as a IEEE754 double value. */ 271 Ico_V128 /* 128-bit restricted vector constant, with 1 bit 272 (repeated 8 times) for each of the 16 x 1-byte lanes */ 273 } 274 IRConstTag; 275 276/* A constant. Stored as a tagged union. 'tag' indicates what kind of 277 constant this is. 'Ico' is the union that holds the fields. If an 278 IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant, 279 and its value can be accessed with 'c.Ico.U32'. */ 280typedef 281 struct _IRConst { 282 IRConstTag tag; 283 union { 284 Bool U1; 285 UChar U8; 286 UShort U16; 287 UInt U32; 288 ULong U64; 289 Float F32; 290 UInt F32i; 291 Double F64; 292 ULong F64i; 293 UShort V128; /* 16-bit value; see Ico_V128 comment above */ 294 } Ico; 295 } 296 IRConst; 297 298/* IRConst constructors */ 299extern IRConst* IRConst_U1 ( Bool ); 300extern IRConst* IRConst_U8 ( UChar ); 301extern IRConst* IRConst_U16 ( UShort ); 302extern IRConst* IRConst_U32 ( UInt ); 303extern IRConst* IRConst_U64 ( ULong ); 304extern IRConst* IRConst_F32 ( Float ); 305extern IRConst* IRConst_F32i ( UInt ); 306extern IRConst* IRConst_F64 ( Double ); 307extern IRConst* IRConst_F64i ( ULong ); 308extern IRConst* IRConst_V128 ( UShort ); 309 310/* Deep-copy an IRConst */ 311extern IRConst* deepCopyIRConst ( IRConst* ); 312 313/* Pretty-print an IRConst */ 314extern void ppIRConst ( IRConst* ); 315 316/* Compare two IRConsts for equality */ 317extern Bool eqIRConst ( IRConst*, IRConst* ); 318 319 320/* ------------------ Call targets ------------------ */ 321 322/* Describes a helper function to call. The name part is purely for 323 pretty printing and not actually used. regparms=n tells the back 324 end that the callee has been declared 325 "__attribute__((regparm(n)))", although indirectly using the 326 VEX_REGPARM(n) macro. On some targets (x86) the back end will need 327 to construct a non-standard sequence to call a function declared 328 like this. 329 330 mcx_mask is a sop to Memcheck. It indicates which args should be 331 considered 'always defined' when lazily computing definedness of 332 the result. Bit 0 of mcx_mask corresponds to args[0], bit 1 to 333 args[1], etc. If a bit is set, the corresponding arg is excluded 334 (hence "x" in "mcx") from definedness checking. 335*/ 336 337typedef 338 struct { 339 Int regparms; 340 HChar* name; 341 void* addr; 342 UInt mcx_mask; 343 } 344 IRCallee; 345 346/* Create an IRCallee. */ 347extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr ); 348 349/* Deep-copy an IRCallee. */ 350extern IRCallee* deepCopyIRCallee ( IRCallee* ); 351 352/* Pretty-print an IRCallee. */ 353extern void ppIRCallee ( IRCallee* ); 354 355 356/* ------------------ Guest state arrays ------------------ */ 357 358/* This describes a section of the guest state that we want to 359 be able to index at run time, so as to be able to describe 360 indexed or rotating register files on the guest. */ 361typedef 362 struct { 363 Int base; /* guest state offset of start of indexed area */ 364 IRType elemTy; /* type of each element in the indexed area */ 365 Int nElems; /* number of elements in the indexed area */ 366 } 367 IRRegArray; 368 369extern IRRegArray* mkIRRegArray ( Int, IRType, Int ); 370 371extern IRRegArray* deepCopyIRRegArray ( IRRegArray* ); 372 373extern void ppIRRegArray ( IRRegArray* ); 374extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* ); 375 376 377/* ------------------ Temporaries ------------------ */ 378 379/* This represents a temporary, eg. t1. The IR optimiser relies on the 380 fact that IRTemps are 32-bit ints. Do not change them to be ints of 381 any other size. */ 382typedef UInt IRTemp; 383 384/* Pretty-print an IRTemp. */ 385extern void ppIRTemp ( IRTemp ); 386 387#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF) 388 389 390/* --------------- Primops (arity 1,2,3 and 4) --------------- */ 391 392/* Primitive operations that are used in Unop, Binop, Triop and Qop 393 IRExprs. Once we take into account integer, floating point and SIMD 394 operations of all the different sizes, there are quite a lot of them. 395 Most instructions supported by the architectures that Vex supports 396 (x86, PPC, etc) are represented. Some more obscure ones (eg. cpuid) 397 are not; they are instead handled with dirty helpers that emulate 398 their functionality. Such obscure ones are thus not directly visible 399 in the IR, but their effects on guest state (memory and registers) 400 are made visible via the annotations in IRDirty structures. 401*/ 402typedef 403 enum { 404 /* -- Do not change this ordering. The IR generators rely on 405 (eg) Iop_Add64 == IopAdd8 + 3. -- */ 406 407 Iop_INVALID=0x14000, 408 Iop_Add8, Iop_Add16, Iop_Add32, Iop_Add64, 409 Iop_Sub8, Iop_Sub16, Iop_Sub32, Iop_Sub64, 410 /* Signless mul. MullS/MullU is elsewhere. */ 411 Iop_Mul8, Iop_Mul16, Iop_Mul32, Iop_Mul64, 412 Iop_Or8, Iop_Or16, Iop_Or32, Iop_Or64, 413 Iop_And8, Iop_And16, Iop_And32, Iop_And64, 414 Iop_Xor8, Iop_Xor16, Iop_Xor32, Iop_Xor64, 415 Iop_Shl8, Iop_Shl16, Iop_Shl32, Iop_Shl64, 416 Iop_Shr8, Iop_Shr16, Iop_Shr32, Iop_Shr64, 417 Iop_Sar8, Iop_Sar16, Iop_Sar32, Iop_Sar64, 418 /* Integer comparisons. */ 419 Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32, Iop_CmpEQ64, 420 Iop_CmpNE8, Iop_CmpNE16, Iop_CmpNE32, Iop_CmpNE64, 421 /* Tags for unary ops */ 422 Iop_Not8, Iop_Not16, Iop_Not32, Iop_Not64, 423 424 /* Exactly like CmpEQ8/16/32/64, but carrying the additional 425 hint that these compute the success/failure of a CAS 426 operation, and hence are almost certainly applied to two 427 copies of the same value, which in turn has implications for 428 Memcheck's instrumentation. */ 429 Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64, 430 Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64, 431 432 /* -- Ordering not important after here. -- */ 433 434 /* Widening multiplies */ 435 Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64, 436 Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64, 437 438 /* Wierdo integer stuff */ 439 Iop_Clz64, Iop_Clz32, /* count leading zeroes */ 440 Iop_Ctz64, Iop_Ctz32, /* count trailing zeros */ 441 /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of 442 zero. You must ensure they are never given a zero argument. 443 */ 444 445 /* Standard integer comparisons */ 446 Iop_CmpLT32S, Iop_CmpLT64S, 447 Iop_CmpLE32S, Iop_CmpLE64S, 448 Iop_CmpLT32U, Iop_CmpLT64U, 449 Iop_CmpLE32U, Iop_CmpLE64U, 450 451 /* As a sop to Valgrind-Memcheck, the following are useful. */ 452 Iop_CmpNEZ8, Iop_CmpNEZ16, Iop_CmpNEZ32, Iop_CmpNEZ64, 453 Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */ 454 Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /* \x -> x | -x */ 455 Iop_Max32U, /* unsigned max */ 456 457 /* PowerPC-style 3-way integer comparisons. Without them it is 458 difficult to simulate PPC efficiently. 459 op(x,y) | x < y = 0x8 else 460 | x > y = 0x4 else 461 | x == y = 0x2 462 */ 463 Iop_CmpORD32U, Iop_CmpORD64U, 464 Iop_CmpORD32S, Iop_CmpORD64S, 465 466 /* Division */ 467 /* TODO: clarify semantics wrt rounding, negative values, whatever */ 468 Iop_DivU32, // :: I32,I32 -> I32 (simple div, no mod) 469 Iop_DivS32, // ditto, signed 470 Iop_DivU64, // :: I64,I64 -> I64 (simple div, no mod) 471 Iop_DivS64, // ditto, signed 472 Iop_DivU64E, // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low)) 473 Iop_DivS64E, // ditto, signed 474 Iop_DivU32E, // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low)) 475 Iop_DivS32E, // ditto, signed 476 477 Iop_DivModU64to32, // :: I64,I32 -> I64 478 // of which lo half is div and hi half is mod 479 Iop_DivModS64to32, // ditto, signed 480 481 Iop_DivModU128to64, // :: V128,I64 -> V128 482 // of which lo half is div and hi half is mod 483 Iop_DivModS128to64, // ditto, signed 484 485 Iop_DivModS64to64, // :: I64,I64 -> I128 486 // of which lo half is div and hi half is mod 487 488 /* Integer conversions. Some of these are redundant (eg 489 Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but 490 having a complete set reduces the typical dynamic size of IR 491 and makes the instruction selectors easier to write. */ 492 493 /* Widening conversions */ 494 Iop_8Uto16, Iop_8Uto32, Iop_8Uto64, 495 Iop_16Uto32, Iop_16Uto64, 496 Iop_32Uto64, 497 Iop_8Sto16, Iop_8Sto32, Iop_8Sto64, 498 Iop_16Sto32, Iop_16Sto64, 499 Iop_32Sto64, 500 501 /* Narrowing conversions */ 502 Iop_64to8, Iop_32to8, Iop_64to16, 503 /* 8 <-> 16 bit conversions */ 504 Iop_16to8, // :: I16 -> I8, low half 505 Iop_16HIto8, // :: I16 -> I8, high half 506 Iop_8HLto16, // :: (I8,I8) -> I16 507 /* 16 <-> 32 bit conversions */ 508 Iop_32to16, // :: I32 -> I16, low half 509 Iop_32HIto16, // :: I32 -> I16, high half 510 Iop_16HLto32, // :: (I16,I16) -> I32 511 /* 32 <-> 64 bit conversions */ 512 Iop_64to32, // :: I64 -> I32, low half 513 Iop_64HIto32, // :: I64 -> I32, high half 514 Iop_32HLto64, // :: (I32,I32) -> I64 515 /* 64 <-> 128 bit conversions */ 516 Iop_128to64, // :: I128 -> I64, low half 517 Iop_128HIto64, // :: I128 -> I64, high half 518 Iop_64HLto128, // :: (I64,I64) -> I128 519 /* 1-bit stuff */ 520 Iop_Not1, /* :: Ity_Bit -> Ity_Bit */ 521 Iop_32to1, /* :: Ity_I32 -> Ity_Bit, just select bit[0] */ 522 Iop_64to1, /* :: Ity_I64 -> Ity_Bit, just select bit[0] */ 523 Iop_1Uto8, /* :: Ity_Bit -> Ity_I8, unsigned widen */ 524 Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */ 525 Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */ 526 Iop_1Sto8, /* :: Ity_Bit -> Ity_I8, signed widen */ 527 Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */ 528 Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */ 529 Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */ 530 531 /* ------ Floating point. We try to be IEEE754 compliant. ------ */ 532 533 /* --- Simple stuff as mandated by 754. --- */ 534 535 /* Binary operations, with rounding. */ 536 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 537 Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64, 538 539 /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */ 540 Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32, 541 542 /* Variants of the above which produce a 64-bit result but which 543 round their result to a IEEE float range first. */ 544 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 545 Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32, 546 547 /* Unary operations, without rounding. */ 548 /* :: F64 -> F64 */ 549 Iop_NegF64, Iop_AbsF64, 550 551 /* :: F32 -> F32 */ 552 Iop_NegF32, Iop_AbsF32, 553 554 /* Unary operations, with rounding. */ 555 /* :: IRRoundingMode(I32) x F64 -> F64 */ 556 Iop_SqrtF64, Iop_SqrtF64r32, 557 558 /* :: IRRoundingMode(I32) x F32 -> F32 */ 559 Iop_SqrtF32, 560 561 /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following: 562 0x45 Unordered 563 0x01 LT 564 0x00 GT 565 0x40 EQ 566 This just happens to be the Intel encoding. The values 567 are recorded in the type IRCmpF64Result. 568 */ 569 /* :: F64 x F64 -> IRCmpF64Result(I32) */ 570 Iop_CmpF64, 571 Iop_CmpF32, 572 Iop_CmpF128, 573 574 /* --- Int to/from FP conversions. --- */ 575 576 /* For the most part, these take a first argument :: Ity_I32 (as 577 IRRoundingMode) which is an indication of the rounding mode 578 to use, as per the following encoding ("the standard 579 encoding"): 580 00b to nearest (the default) 581 01b to -infinity 582 10b to +infinity 583 11b to zero 584 This just happens to be the Intel encoding. For reference only, 585 the PPC encoding is: 586 00b to nearest (the default) 587 01b to zero 588 10b to +infinity 589 11b to -infinity 590 Any PPC -> IR front end will have to translate these PPC 591 encodings, as encoded in the guest state, to the standard 592 encodings, to pass to the primops. 593 For reference only, the ARM VFP encoding is: 594 00b to nearest 595 01b to +infinity 596 10b to -infinity 597 11b to zero 598 Again, this will have to be converted to the standard encoding 599 to pass to primops. 600 601 If one of these conversions gets an out-of-range condition, 602 or a NaN, as an argument, the result is host-defined. On x86 603 the "integer indefinite" value 0x80..00 is produced. On PPC 604 it is either 0x80..00 or 0x7F..FF depending on the sign of 605 the argument. 606 607 On ARMvfp, when converting to a signed integer result, the 608 overflow result is 0x80..00 for negative args and 0x7F..FF 609 for positive args. For unsigned integer results it is 610 0x00..00 and 0xFF..FF respectively. 611 612 Rounding is required whenever the destination type cannot 613 represent exactly all values of the source type. 614 */ 615 Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */ 616 Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */ 617 Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */ 618 Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */ 619 620 Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */ 621 622 Iop_I16StoF64, /* signed I16 -> F64 */ 623 Iop_I32StoF64, /* signed I32 -> F64 */ 624 Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */ 625 Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */ 626 Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */ 627 628 Iop_I32UtoF64, /* unsigned I32 -> F64 */ 629 630 Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */ 631 Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */ 632 Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */ 633 634 Iop_I16StoF32, /* signed I16 -> F32 */ 635 Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */ 636 Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */ 637 638 /* Conversion between floating point formats */ 639 Iop_F32toF64, /* F32 -> F64 */ 640 Iop_F64toF32, /* IRRoundingMode(I32) x F64 -> F32 */ 641 642 /* Reinterpretation. Take an F64 and produce an I64 with 643 the same bit pattern, or vice versa. */ 644 Iop_ReinterpF64asI64, Iop_ReinterpI64asF64, 645 Iop_ReinterpF32asI32, Iop_ReinterpI32asF32, 646 647 /* Support for 128-bit floating point */ 648 Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */ 649 Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */ 650 Iop_F128LOtoF64,/* F128 -> low half of F128 into a F64 register */ 651 652 /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */ 653 Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128, 654 655 /* :: F128 -> F128 */ 656 Iop_NegF128, Iop_AbsF128, 657 658 /* :: IRRoundingMode(I32) x F128 -> F128 */ 659 Iop_SqrtF128, 660 661 Iop_I32StoF128, /* signed I32 -> F128 */ 662 Iop_I64StoF128, /* signed I64 -> F128 */ 663 Iop_F32toF128, /* F32 -> F128 */ 664 Iop_F64toF128, /* F64 -> F128 */ 665 666 Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32 */ 667 Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64 */ 668 Iop_F128toF64, /* IRRoundingMode(I32) x F128 -> F64 */ 669 Iop_F128toF32, /* IRRoundingMode(I32) x F128 -> F32 */ 670 671 /* --- guest x86/amd64 specifics, not mandated by 754. --- */ 672 673 /* Binary ops, with rounding. */ 674 /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */ 675 Iop_AtanF64, /* FPATAN, arctan(arg1/arg2) */ 676 Iop_Yl2xF64, /* FYL2X, arg1 * log2(arg2) */ 677 Iop_Yl2xp1F64, /* FYL2XP1, arg1 * log2(arg2+1.0) */ 678 Iop_PRemF64, /* FPREM, non-IEEE remainder(arg1/arg2) */ 679 Iop_PRemC3210F64, /* C3210 flags resulting from FPREM, :: I32 */ 680 Iop_PRem1F64, /* FPREM1, IEEE remainder(arg1/arg2) */ 681 Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */ 682 Iop_ScaleF64, /* FSCALE, arg1 * (2^RoundTowardsZero(arg2)) */ 683 /* Note that on x86 guest, PRem1{C3210} has the same behaviour 684 as the IEEE mandated RemF64, except it is limited in the 685 range of its operand. Hence the partialness. */ 686 687 /* Unary ops, with rounding. */ 688 /* :: IRRoundingMode(I32) x F64 -> F64 */ 689 Iop_SinF64, /* FSIN */ 690 Iop_CosF64, /* FCOS */ 691 Iop_TanF64, /* FTAN */ 692 Iop_2xm1F64, /* (2^arg - 1.0) */ 693 Iop_RoundF64toInt, /* F64 value to nearest integral value (still 694 as F64) */ 695 Iop_RoundF32toInt, /* F32 value to nearest integral value (still 696 as F32) */ 697 698 /* --- guest s390 specifics, not mandated by 754. --- */ 699 700 /* Fused multiply-add/sub */ 701 /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32 702 (computes op3 * op2 +/- op1 */ 703 Iop_MAddF32, Iop_MSubF32, 704 705 /* --- guest ppc32/64 specifics, not mandated by 754. --- */ 706 707 /* Ternary operations, with rounding. */ 708 /* Fused multiply-add/sub, with 112-bit intermediate 709 precision for ppc. 710 Also used to implement fused multiply-add/sub for s390. */ 711 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 712 (computes arg2 * arg3 +/- arg4) */ 713 Iop_MAddF64, Iop_MSubF64, 714 715 /* Variants of the above which produce a 64-bit result but which 716 round their result to a IEEE float range first. */ 717 /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */ 718 Iop_MAddF64r32, Iop_MSubF64r32, 719 720 /* :: F64 -> F64 */ 721 Iop_Est5FRSqrt, /* reciprocal square root estimate, 5 good bits */ 722 Iop_RoundF64toF64_NEAREST, /* frin */ 723 Iop_RoundF64toF64_NegINF, /* frim */ 724 Iop_RoundF64toF64_PosINF, /* frip */ 725 Iop_RoundF64toF64_ZERO, /* friz */ 726 727 /* :: F64 -> F32 */ 728 Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */ 729 730 /* :: IRRoundingMode(I32) x F64 -> F64 */ 731 Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */ 732 /* NB: pretty much the same as Iop_F64toF32, except no change 733 of type. */ 734 735 /* :: F64 -> I32 */ 736 Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord) 737 from FP result */ 738 739 /* ------------------ 32-bit SIMD Integer ------------------ */ 740 741 /* 16x2 add/sub, also signed/unsigned saturating variants */ 742 Iop_Add16x2, Iop_Sub16x2, 743 Iop_QAdd16Sx2, Iop_QAdd16Ux2, 744 Iop_QSub16Sx2, Iop_QSub16Ux2, 745 746 /* 16x2 signed/unsigned halving add/sub. For each lane, these 747 compute bits 16:1 of (eg) sx(argL) + sx(argR), 748 or zx(argL) - zx(argR) etc. */ 749 Iop_HAdd16Ux2, Iop_HAdd16Sx2, 750 Iop_HSub16Ux2, Iop_HSub16Sx2, 751 752 /* 8x4 add/sub, also signed/unsigned saturating variants */ 753 Iop_Add8x4, Iop_Sub8x4, 754 Iop_QAdd8Sx4, Iop_QAdd8Ux4, 755 Iop_QSub8Sx4, Iop_QSub8Ux4, 756 757 /* 8x4 signed/unsigned halving add/sub. For each lane, these 758 compute bits 8:1 of (eg) sx(argL) + sx(argR), 759 or zx(argL) - zx(argR) etc. */ 760 Iop_HAdd8Ux4, Iop_HAdd8Sx4, 761 Iop_HSub8Ux4, Iop_HSub8Sx4, 762 763 /* 8x4 sum of absolute unsigned differences. */ 764 Iop_Sad8Ux4, 765 766 /* MISC (vector integer cmp != 0) */ 767 Iop_CmpNEZ16x2, Iop_CmpNEZ8x4, 768 769 /* ------------------ 64-bit SIMD FP ------------------------ */ 770 771 /* Convertion to/from int */ 772 Iop_I32UtoFx2, Iop_I32StoFx2, /* I32x4 -> F32x4 */ 773 Iop_FtoI32Ux2_RZ, Iop_FtoI32Sx2_RZ, /* F32x4 -> I32x4 */ 774 /* Fixed32 format is floating-point number with fixed number of fraction 775 bits. The number of fraction bits is passed as a second argument of 776 type I8. */ 777 Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */ 778 Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */ 779 780 /* Binary operations */ 781 Iop_Max32Fx2, Iop_Min32Fx2, 782 /* Pairwise Min and Max. See integer pairwise operations for more 783 details. */ 784 Iop_PwMax32Fx2, Iop_PwMin32Fx2, 785 /* Note: For the following compares, the arm front-end assumes a 786 nan in a lane of either argument returns zero for that lane. */ 787 Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2, 788 789 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 790 element in the operand vector, and places the results in the destination 791 vector. */ 792 Iop_Recip32Fx2, 793 794 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 795 Note, that if one of the arguments is zero and another one is infinity 796 of arbitrary sign the result of the operation is 2.0. */ 797 Iop_Recps32Fx2, 798 799 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 800 square root of each element in the operand vector. */ 801 Iop_Rsqrte32Fx2, 802 803 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 804 Note, that of one of the arguments is zero and another one is infiinty 805 of arbitrary sign the result of the operation is 1.5. */ 806 Iop_Rsqrts32Fx2, 807 808 /* Unary */ 809 Iop_Neg32Fx2, Iop_Abs32Fx2, 810 811 /* ------------------ 64-bit SIMD Integer. ------------------ */ 812 813 /* MISC (vector integer cmp != 0) */ 814 Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2, 815 816 /* ADDITION (normal / unsigned sat / signed sat) */ 817 Iop_Add8x8, Iop_Add16x4, Iop_Add32x2, 818 Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1, 819 Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1, 820 821 /* PAIRWISE operations */ 822 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 823 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 824 Iop_PwAdd8x8, Iop_PwAdd16x4, Iop_PwAdd32x2, 825 Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2, 826 Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2, 827 Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2, 828 Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2, 829 /* Longening variant is unary. The resulting vector contains two times 830 less elements than operand, but they are two times wider. 831 Example: 832 Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 833 where a+b and c+d are unsigned 32-bit values. */ 834 Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2, 835 Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2, 836 837 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 838 Iop_Sub8x8, Iop_Sub16x4, Iop_Sub32x2, 839 Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1, 840 Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1, 841 842 /* ABSOLUTE VALUE */ 843 Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2, 844 845 /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */ 846 Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2, 847 Iop_Mul32Fx2, 848 Iop_MulHi16Ux4, 849 Iop_MulHi16Sx4, 850 /* Plynomial multiplication treats it's arguments as coefficients of 851 polynoms over {0, 1}. */ 852 Iop_PolynomialMul8x8, 853 854 /* Vector Saturating Doubling Multiply Returning High Half and 855 Vector Saturating Rounding Doubling Multiply Returning High Half */ 856 /* These IROp's multiply corresponding elements in two vectors, double 857 the results, and place the most significant half of the final results 858 in the destination vector. The results are truncated or rounded. If 859 any of the results overflow, they are saturated. */ 860 Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2, 861 Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2, 862 863 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 864 Iop_Avg8Ux8, 865 Iop_Avg16Ux4, 866 867 /* MIN/MAX */ 868 Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2, 869 Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2, 870 Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2, 871 Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2, 872 873 /* COMPARISON */ 874 Iop_CmpEQ8x8, Iop_CmpEQ16x4, Iop_CmpEQ32x2, 875 Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2, 876 Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2, 877 878 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 879 bit) */ 880 Iop_Cnt8x8, 881 Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2, 882 Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2, 883 884 /* VECTOR x VECTOR SHIFT / ROTATE */ 885 Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2, 886 Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2, 887 Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2, 888 Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1, 889 890 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 891 Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2, 892 Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2, 893 Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2, 894 895 /* VECTOR x VECTOR SATURATING SHIFT */ 896 Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1, 897 Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1, 898 /* VECTOR x INTEGER SATURATING SHIFT */ 899 Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1, 900 Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1, 901 Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1, 902 903 /* NARROWING (binary) 904 -- narrow 2xI64 into 1xI64, hi half from left arg */ 905 /* For saturated narrowing, I believe there are 4 variants of 906 the basic arithmetic operation, depending on the signedness 907 of argument and result. Here are examples that exemplify 908 what I mean: 909 910 QNarrow16Uto8U ( UShort x ) if (x >u 255) x = 255; 911 return x[7:0]; 912 913 QNarrow16Sto8S ( Short x ) if (x <s -128) x = -128; 914 if (x >s 127) x = 127; 915 return x[7:0]; 916 917 QNarrow16Uto8S ( UShort x ) if (x >u 127) x = 127; 918 return x[7:0]; 919 920 QNarrow16Sto8U ( Short x ) if (x <s 0) x = 0; 921 if (x >s 255) x = 255; 922 return x[7:0]; 923 */ 924 Iop_QNarrowBin16Sto8Ux8, 925 Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4, 926 927 /* INTERLEAVING */ 928 /* Interleave lanes from low or high halves of 929 operands. Most-significant result lane is from the left 930 arg. */ 931 Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2, 932 Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2, 933 /* Interleave odd/even lanes of operands. Most-significant result lane 934 is from the left arg. Note that Interleave{Odd,Even}Lanes32x2 are 935 identical to Interleave{HI,LO}32x2 and so are omitted.*/ 936 Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8, 937 Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4, 938 939 940 /* CONCATENATION -- build a new value by concatenating either 941 the even or odd lanes of both operands. Note that 942 Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2 943 and so are omitted. */ 944 Iop_CatOddLanes8x8, Iop_CatOddLanes16x4, 945 Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4, 946 947 /* GET / SET elements of VECTOR 948 GET is binop (I64, I8) -> I<elem_size> 949 SET is triop (I64, I8, I<elem_size>) -> I64 */ 950 /* Note: the arm back-end handles only constant second argument */ 951 Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2, 952 Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2, 953 954 /* DUPLICATING -- copy value to all lanes */ 955 Iop_Dup8x8, Iop_Dup16x4, Iop_Dup32x2, 956 957 /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes 958 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of 959 result. 960 It is a triop: (I64, I64, I8) -> I64 */ 961 /* Note: the arm back-end handles only constant third argumnet. */ 962 Iop_Extract64, 963 964 /* REVERSE the order of elements in each Half-words, Words, 965 Double-words */ 966 /* Examples: 967 Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 968 Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] 969 Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */ 970 Iop_Reverse16_8x8, 971 Iop_Reverse32_8x8, Iop_Reverse32_16x4, 972 Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2, 973 974 /* PERMUTING -- copy src bytes to dst, 975 as indexed by control vector bytes: 976 for i in 0 .. 7 . result[i] = argL[ argR[i] ] 977 argR[i] values may only be in the range 0 .. 7, else behaviour 978 is undefined. */ 979 Iop_Perm8x8, 980 981 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 982 See floating-point equiwalents for details. */ 983 Iop_Recip32x2, Iop_Rsqrte32x2, 984 985 /* ------------------ 128-bit SIMD FP. ------------------ */ 986 987 /* --- 32x4 vector FP --- */ 988 989 /* binary */ 990 Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4, 991 Iop_Max32Fx4, Iop_Min32Fx4, 992 Iop_Add32Fx2, Iop_Sub32Fx2, 993 /* Note: For the following compares, the ppc and arm front-ends assume a 994 nan in a lane of either argument returns zero for that lane. */ 995 Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4, 996 Iop_CmpGT32Fx4, Iop_CmpGE32Fx4, 997 998 /* Vector Absolute */ 999 Iop_Abs32Fx4, 1000 1001 /* Pairwise Max and Min. See integer pairwise operations for details. */ 1002 Iop_PwMax32Fx4, Iop_PwMin32Fx4, 1003 1004 /* unary */ 1005 Iop_Sqrt32Fx4, Iop_RSqrt32Fx4, 1006 Iop_Neg32Fx4, 1007 1008 /* Vector Reciprocal Estimate finds an approximate reciprocal of each 1009 element in the operand vector, and places the results in the destination 1010 vector. */ 1011 Iop_Recip32Fx4, 1012 1013 /* Vector Reciprocal Step computes (2.0 - arg1 * arg2). 1014 Note, that if one of the arguments is zero and another one is infinity 1015 of arbitrary sign the result of the operation is 2.0. */ 1016 Iop_Recps32Fx4, 1017 1018 /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal 1019 square root of each element in the operand vector. */ 1020 Iop_Rsqrte32Fx4, 1021 1022 /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0. 1023 Note, that of one of the arguments is zero and another one is infiinty 1024 of arbitrary sign the result of the operation is 1.5. */ 1025 Iop_Rsqrts32Fx4, 1026 1027 1028 /* --- Int to/from FP conversion --- */ 1029 /* Unlike the standard fp conversions, these irops take no 1030 rounding mode argument. Instead the irop trailers _R{M,P,N,Z} 1031 indicate the mode: {-inf, +inf, nearest, zero} respectively. */ 1032 Iop_I32UtoFx4, Iop_I32StoFx4, /* I32x4 -> F32x4 */ 1033 Iop_FtoI32Ux4_RZ, Iop_FtoI32Sx4_RZ, /* F32x4 -> I32x4 */ 1034 Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ, /* F32x4 -> I32x4 (with saturation) */ 1035 Iop_RoundF32x4_RM, Iop_RoundF32x4_RP, /* round to fp integer */ 1036 Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ, /* round to fp integer */ 1037 /* Fixed32 format is floating-point number with fixed number of fraction 1038 bits. The number of fraction bits is passed as a second argument of 1039 type I8. */ 1040 Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */ 1041 Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */ 1042 1043 /* --- Single to/from half conversion --- */ 1044 /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */ 1045 Iop_F32toF16x4, Iop_F16toF32x4, /* F32x4 <-> F16x4 */ 1046 1047 /* --- 32x4 lowest-lane-only scalar FP --- */ 1048 1049 /* In binary cases, upper 3/4 is copied from first operand. In 1050 unary cases, upper 3/4 is copied from the operand. */ 1051 1052 /* binary */ 1053 Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4, 1054 Iop_Max32F0x4, Iop_Min32F0x4, 1055 Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4, 1056 1057 /* unary */ 1058 Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4, 1059 1060 /* --- 64x2 vector FP --- */ 1061 1062 /* binary */ 1063 Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2, 1064 Iop_Max64Fx2, Iop_Min64Fx2, 1065 Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2, 1066 1067 /* unary */ 1068 Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2, 1069 1070 /* --- 64x2 lowest-lane-only scalar FP --- */ 1071 1072 /* In binary cases, upper half is copied from first operand. In 1073 unary cases, upper half is copied from the operand. */ 1074 1075 /* binary */ 1076 Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2, 1077 Iop_Max64F0x2, Iop_Min64F0x2, 1078 Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2, 1079 1080 /* unary */ 1081 Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2, 1082 1083 /* --- pack / unpack --- */ 1084 1085 /* 64 <-> 128 bit vector */ 1086 Iop_V128to64, // :: V128 -> I64, low half 1087 Iop_V128HIto64, // :: V128 -> I64, high half 1088 Iop_64HLtoV128, // :: (I64,I64) -> V128 1089 1090 Iop_64UtoV128, 1091 Iop_SetV128lo64, 1092 1093 /* 32 <-> 128 bit vector */ 1094 Iop_32UtoV128, 1095 Iop_V128to32, // :: V128 -> I32, lowest lane 1096 Iop_SetV128lo32, // :: (V128,I32) -> V128 1097 1098 /* ------------------ 128-bit SIMD Integer. ------------------ */ 1099 1100 /* BITWISE OPS */ 1101 Iop_NotV128, 1102 Iop_AndV128, Iop_OrV128, Iop_XorV128, 1103 1104 /* VECTOR SHIFT (shift amt :: Ity_I8) */ 1105 Iop_ShlV128, Iop_ShrV128, 1106 1107 /* MISC (vector integer cmp != 0) */ 1108 Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2, 1109 1110 /* ADDITION (normal / unsigned sat / signed sat) */ 1111 Iop_Add8x16, Iop_Add16x8, Iop_Add32x4, Iop_Add64x2, 1112 Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2, 1113 Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2, 1114 1115 /* SUBTRACTION (normal / unsigned sat / signed sat) */ 1116 Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4, Iop_Sub64x2, 1117 Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2, 1118 Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2, 1119 1120 /* MULTIPLICATION (normal / high half of signed/unsigned) */ 1121 Iop_Mul8x16, Iop_Mul16x8, Iop_Mul32x4, 1122 Iop_MulHi16Ux8, Iop_MulHi32Ux4, 1123 Iop_MulHi16Sx8, Iop_MulHi32Sx4, 1124 /* (widening signed/unsigned of even lanes, with lowest lane=zero) */ 1125 Iop_MullEven8Ux16, Iop_MullEven16Ux8, 1126 Iop_MullEven8Sx16, Iop_MullEven16Sx8, 1127 /* FIXME: document these */ 1128 Iop_Mull8Ux8, Iop_Mull8Sx8, 1129 Iop_Mull16Ux4, Iop_Mull16Sx4, 1130 Iop_Mull32Ux2, Iop_Mull32Sx2, 1131 /* Vector Saturating Doubling Multiply Returning High Half and 1132 Vector Saturating Rounding Doubling Multiply Returning High Half */ 1133 /* These IROp's multiply corresponding elements in two vectors, double 1134 the results, and place the most significant half of the final results 1135 in the destination vector. The results are truncated or rounded. If 1136 any of the results overflow, they are saturated. */ 1137 Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4, 1138 Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, 1139 /* Doubling saturating multiplication (long) (I64, I64) -> V128 */ 1140 Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2, 1141 /* Plynomial multiplication treats it's arguments as coefficients of 1142 polynoms over {0, 1}. */ 1143 Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */ 1144 Iop_PolynomialMull8x8, /* (I64, I64) -> V128 */ 1145 1146 /* PAIRWISE operations */ 1147 /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) = 1148 [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */ 1149 Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4, 1150 Iop_PwAdd32Fx2, 1151 /* Longening variant is unary. The resulting vector contains two times 1152 less elements than operand, but they are two times wider. 1153 Example: 1154 Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d] 1155 where a+b and c+d are unsigned 32-bit values. */ 1156 Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4, 1157 Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4, 1158 1159 /* ABSOLUTE VALUE */ 1160 Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, 1161 1162 /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */ 1163 Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4, 1164 Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4, 1165 1166 /* MIN/MAX */ 1167 Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, 1168 Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, 1169 Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, 1170 Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, 1171 1172 /* COMPARISON */ 1173 Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4, 1174 Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2, 1175 Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, 1176 1177 /* COUNT ones / leading zeroes / leading sign bits (not including topmost 1178 bit) */ 1179 Iop_Cnt8x16, 1180 Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4, 1181 Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4, 1182 1183 /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */ 1184 Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2, 1185 Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2, 1186 Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2, 1187 1188 /* VECTOR x VECTOR SHIFT / ROTATE */ 1189 Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2, 1190 Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2, 1191 Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2, 1192 Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2, 1193 Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, 1194 1195 /* VECTOR x VECTOR SATURATING SHIFT */ 1196 Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2, 1197 Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2, 1198 /* VECTOR x INTEGER SATURATING SHIFT */ 1199 Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2, 1200 Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2, 1201 Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2, 1202 1203 /* NARROWING (binary) 1204 -- narrow 2xV128 into 1xV128, hi half from left arg */ 1205 /* See comments above w.r.t. U vs S issues in saturated narrowing. */ 1206 Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8, 1207 Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8, 1208 Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8, 1209 Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8, 1210 1211 /* NARROWING (unary) -- narrow V128 into I64 */ 1212 Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2, 1213 /* Saturating narrowing from signed source to signed/unsigned destination */ 1214 Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2, 1215 Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2, 1216 /* Saturating narrowing from unsigned source to unsigned destination */ 1217 Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2, 1218 1219 /* WIDENING -- sign or zero extend each element of the argument 1220 vector to the twice original size. The resulting vector consists of 1221 the same number of elements but each element and the vector itself 1222 are twice as wide. 1223 All operations are I64->V128. 1224 Example 1225 Iop_Widen32Sto64x2( [a, b] ) = [c, d] 1226 where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */ 1227 Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2, 1228 Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2, 1229 1230 /* INTERLEAVING */ 1231 /* Interleave lanes from low or high halves of 1232 operands. Most-significant result lane is from the left 1233 arg. */ 1234 Iop_InterleaveHI8x16, Iop_InterleaveHI16x8, 1235 Iop_InterleaveHI32x4, Iop_InterleaveHI64x2, 1236 Iop_InterleaveLO8x16, Iop_InterleaveLO16x8, 1237 Iop_InterleaveLO32x4, Iop_InterleaveLO64x2, 1238 /* Interleave odd/even lanes of operands. Most-significant result lane 1239 is from the left arg. */ 1240 Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16, 1241 Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8, 1242 Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4, 1243 1244 /* CONCATENATION -- build a new value by concatenating either 1245 the even or odd lanes of both operands. */ 1246 Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4, 1247 Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4, 1248 1249 /* GET elements of VECTOR 1250 GET is binop (V128, I8) -> I<elem_size> */ 1251 /* Note: the arm back-end handles only constant second argument. */ 1252 Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2, 1253 1254 /* DUPLICATING -- copy value to all lanes */ 1255 Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4, 1256 1257 /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes 1258 of result and arg3 lowest bytes of arg2 to arg3 highest bytes of 1259 result. 1260 It is a triop: (V128, V128, I8) -> V128 */ 1261 /* Note: the ARM back end handles only constant arg3 in this operation. */ 1262 Iop_ExtractV128, 1263 1264 /* REVERSE the order of elements in each Half-words, Words, 1265 Double-words */ 1266 /* Examples: 1267 Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g] 1268 Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */ 1269 Iop_Reverse16_8x16, 1270 Iop_Reverse32_8x16, Iop_Reverse32_16x8, 1271 Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4, 1272 1273 /* PERMUTING -- copy src bytes to dst, 1274 as indexed by control vector bytes: 1275 for i in 0 .. 15 . result[i] = argL[ argR[i] ] 1276 argR[i] values may only be in the range 0 .. 15, else behaviour 1277 is undefined. */ 1278 Iop_Perm8x16, 1279 1280 /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate 1281 See floating-point equiwalents for details. */ 1282 Iop_Recip32x4, Iop_Rsqrte32x4 1283 } 1284 IROp; 1285 1286/* Pretty-print an op. */ 1287extern void ppIROp ( IROp ); 1288 1289 1290/* Encoding of IEEE754-specified rounding modes. This is the same as 1291 the encoding used by Intel IA32 to indicate x87 rounding mode. 1292 Note, various front and back ends rely on the actual numerical 1293 values of these, so do not change them. */ 1294typedef 1295 enum { 1296 Irrm_NEAREST = 0, 1297 Irrm_NegINF = 1, 1298 Irrm_PosINF = 2, 1299 Irrm_ZERO = 3 1300 } 1301 IRRoundingMode; 1302 1303/* Floating point comparison result values, as created by Iop_CmpF64. 1304 This is also derived from what IA32 does. */ 1305typedef 1306 enum { 1307 Ircr_UN = 0x45, 1308 Ircr_LT = 0x01, 1309 Ircr_GT = 0x00, 1310 Ircr_EQ = 0x40 1311 } 1312 IRCmpF64Result; 1313 1314typedef IRCmpF64Result IRCmpF32Result; 1315typedef IRCmpF64Result IRCmpF128Result; 1316 1317/* ------------------ Expressions ------------------ */ 1318 1319/* The different kinds of expressions. Their meaning is explained below 1320 in the comments for IRExpr. */ 1321typedef 1322 enum { 1323 Iex_Binder=0x15000, 1324 Iex_Get, 1325 Iex_GetI, 1326 Iex_RdTmp, 1327 Iex_Qop, 1328 Iex_Triop, 1329 Iex_Binop, 1330 Iex_Unop, 1331 Iex_Load, 1332 Iex_Const, 1333 Iex_Mux0X, 1334 Iex_CCall 1335 } 1336 IRExprTag; 1337 1338/* An expression. Stored as a tagged union. 'tag' indicates what kind 1339 of expression this is. 'Iex' is the union that holds the fields. If 1340 an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load 1341 expression, and the fields can be accessed with 1342 'e.Iex.Load.<fieldname>'. 1343 1344 For each kind of expression, we show what it looks like when 1345 pretty-printed with ppIRExpr(). 1346*/ 1347typedef 1348 struct _IRExpr 1349 IRExpr; 1350 1351struct _IRExpr { 1352 IRExprTag tag; 1353 union { 1354 /* Used only in pattern matching within Vex. Should not be seen 1355 outside of Vex. */ 1356 struct { 1357 Int binder; 1358 } Binder; 1359 1360 /* Read a guest register, at a fixed offset in the guest state. 1361 ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0) 1362 */ 1363 struct { 1364 Int offset; /* Offset into the guest state */ 1365 IRType ty; /* Type of the value being read */ 1366 } Get; 1367 1368 /* Read a guest register at a non-fixed offset in the guest 1369 state. This allows circular indexing into parts of the guest 1370 state, which is essential for modelling situations where the 1371 identity of guest registers is not known until run time. One 1372 example is the x87 FP register stack. 1373 1374 The part of the guest state to be treated as a circular array 1375 is described in the IRRegArray 'descr' field. It holds the 1376 offset of the first element in the array, the type of each 1377 element, and the number of elements. 1378 1379 The array index is indicated rather indirectly, in a way 1380 which makes optimisation easy: as the sum of variable part 1381 (the 'ix' field) and a constant offset (the 'bias' field). 1382 1383 Since the indexing is circular, the actual array index to use 1384 is computed as (ix + bias) % num-of-elems-in-the-array. 1385 1386 Here's an example. The description 1387 1388 (96:8xF64)[t39,-7] 1389 1390 describes an array of 8 F64-typed values, the 1391 guest-state-offset of the first being 96. This array is 1392 being indexed at (t39 - 7) % 8. 1393 1394 It is important to get the array size/type exactly correct 1395 since IR optimisation looks closely at such info in order to 1396 establish aliasing/non-aliasing between seperate GetI and 1397 PutI events, which is used to establish when they can be 1398 reordered, etc. Putting incorrect info in will lead to 1399 obscure IR optimisation bugs. 1400 1401 ppIRExpr output: GETI<descr>[<ix>,<bias] 1402 eg. GETI(128:8xI8)[t1,0] 1403 */ 1404 struct { 1405 IRRegArray* descr; /* Part of guest state treated as circular */ 1406 IRExpr* ix; /* Variable part of index into array */ 1407 Int bias; /* Constant offset part of index into array */ 1408 } GetI; 1409 1410 /* The value held by a temporary. 1411 ppIRExpr output: t<tmp>, eg. t1 1412 */ 1413 struct { 1414 IRTemp tmp; /* The temporary number */ 1415 } RdTmp; 1416 1417 /* A quaternary operation. 1418 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>), 1419 eg. MAddF64r32(t1, t2, t3, t4) 1420 */ 1421 struct { 1422 IROp op; /* op-code */ 1423 IRExpr* arg1; /* operand 1 */ 1424 IRExpr* arg2; /* operand 2 */ 1425 IRExpr* arg3; /* operand 3 */ 1426 IRExpr* arg4; /* operand 4 */ 1427 } Qop; 1428 1429 /* A ternary operation. 1430 ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>), 1431 eg. MulF64(1, 2.0, 3.0) 1432 */ 1433 struct { 1434 IROp op; /* op-code */ 1435 IRExpr* arg1; /* operand 1 */ 1436 IRExpr* arg2; /* operand 2 */ 1437 IRExpr* arg3; /* operand 3 */ 1438 } Triop; 1439 1440 /* A binary operation. 1441 ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2) 1442 */ 1443 struct { 1444 IROp op; /* op-code */ 1445 IRExpr* arg1; /* operand 1 */ 1446 IRExpr* arg2; /* operand 2 */ 1447 } Binop; 1448 1449 /* A unary operation. 1450 ppIRExpr output: <op>(<arg>), eg. Neg8(t1) 1451 */ 1452 struct { 1453 IROp op; /* op-code */ 1454 IRExpr* arg; /* operand */ 1455 } Unop; 1456 1457 /* A load from memory -- a normal load, not a load-linked. 1458 Load-Linkeds (and Store-Conditionals) are instead represented 1459 by IRStmt.LLSC since Load-Linkeds have side effects and so 1460 are not semantically valid IRExpr's. 1461 ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1) 1462 */ 1463 struct { 1464 IREndness end; /* Endian-ness of the load */ 1465 IRType ty; /* Type of the loaded value */ 1466 IRExpr* addr; /* Address being loaded from */ 1467 } Load; 1468 1469 /* A constant-valued expression. 1470 ppIRExpr output: <con>, eg. 0x4:I32 1471 */ 1472 struct { 1473 IRConst* con; /* The constant itself */ 1474 } Const; 1475 1476 /* A call to a pure (no side-effects) helper C function. 1477 1478 With the 'cee' field, 'name' is the function's name. It is 1479 only used for pretty-printing purposes. The address to call 1480 (host address, of course) is stored in the 'addr' field 1481 inside 'cee'. 1482 1483 The 'args' field is a NULL-terminated array of arguments. 1484 The stated return IRType, and the implied argument types, 1485 must match that of the function being called well enough so 1486 that the back end can actually generate correct code for the 1487 call. 1488 1489 The called function **must** satisfy the following: 1490 1491 * no side effects -- must be a pure function, the result of 1492 which depends only on the passed parameters. 1493 1494 * it may not look at, nor modify, any of the guest state 1495 since that would hide guest state transitions from 1496 instrumenters 1497 1498 * it may not access guest memory, since that would hide 1499 guest memory transactions from the instrumenters 1500 1501 This is restrictive, but makes the semantics clean, and does 1502 not interfere with IR optimisation. 1503 1504 If you want to call a helper which can mess with guest state 1505 and/or memory, instead use Ist_Dirty. This is a lot more 1506 flexible, but you have to give a bunch of details about what 1507 the helper does (and you better be telling the truth, 1508 otherwise any derived instrumentation will be wrong). Also 1509 Ist_Dirty inhibits various IR optimisations and so can cause 1510 quite poor code to be generated. Try to avoid it. 1511 1512 ppIRExpr output: <cee>(<args>):<retty> 1513 eg. foo{0x80489304}(t1, t2):I32 1514 */ 1515 struct { 1516 IRCallee* cee; /* Function to call. */ 1517 IRType retty; /* Type of return value. */ 1518 IRExpr** args; /* Vector of argument expressions. */ 1519 } CCall; 1520 1521 /* A ternary if-then-else operator. It returns expr0 if cond is 1522 zero, exprX otherwise. Note that it is STRICT, ie. both 1523 expr0 and exprX are evaluated in all cases. 1524 1525 ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>), 1526 eg. Mux0X(t6,t7,t8) 1527 */ 1528 struct { 1529 IRExpr* cond; /* Condition */ 1530 IRExpr* expr0; /* True expression */ 1531 IRExpr* exprX; /* False expression */ 1532 } Mux0X; 1533 } Iex; 1534}; 1535 1536/* Expression constructors. */ 1537extern IRExpr* IRExpr_Binder ( Int binder ); 1538extern IRExpr* IRExpr_Get ( Int off, IRType ty ); 1539extern IRExpr* IRExpr_GetI ( IRRegArray* descr, IRExpr* ix, Int bias ); 1540extern IRExpr* IRExpr_RdTmp ( IRTemp tmp ); 1541extern IRExpr* IRExpr_Qop ( IROp op, IRExpr* arg1, IRExpr* arg2, 1542 IRExpr* arg3, IRExpr* arg4 ); 1543extern IRExpr* IRExpr_Triop ( IROp op, IRExpr* arg1, 1544 IRExpr* arg2, IRExpr* arg3 ); 1545extern IRExpr* IRExpr_Binop ( IROp op, IRExpr* arg1, IRExpr* arg2 ); 1546extern IRExpr* IRExpr_Unop ( IROp op, IRExpr* arg ); 1547extern IRExpr* IRExpr_Load ( IREndness end, IRType ty, IRExpr* addr ); 1548extern IRExpr* IRExpr_Const ( IRConst* con ); 1549extern IRExpr* IRExpr_CCall ( IRCallee* cee, IRType retty, IRExpr** args ); 1550extern IRExpr* IRExpr_Mux0X ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX ); 1551 1552/* Deep-copy an IRExpr. */ 1553extern IRExpr* deepCopyIRExpr ( IRExpr* ); 1554 1555/* Pretty-print an IRExpr. */ 1556extern void ppIRExpr ( IRExpr* ); 1557 1558/* NULL-terminated IRExpr vector constructors, suitable for 1559 use as arg lists in clean/dirty helper calls. */ 1560extern IRExpr** mkIRExprVec_0 ( void ); 1561extern IRExpr** mkIRExprVec_1 ( IRExpr* ); 1562extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* ); 1563extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* ); 1564extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* ); 1565extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1566 IRExpr* ); 1567extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1568 IRExpr*, IRExpr* ); 1569extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1570 IRExpr*, IRExpr*, IRExpr* ); 1571extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*, 1572 IRExpr*, IRExpr*, IRExpr*, IRExpr*); 1573 1574/* IRExpr copiers: 1575 - shallowCopy: shallow-copy (ie. create a new vector that shares the 1576 elements with the original). 1577 - deepCopy: deep-copy (ie. create a completely new vector). */ 1578extern IRExpr** shallowCopyIRExprVec ( IRExpr** ); 1579extern IRExpr** deepCopyIRExprVec ( IRExpr** ); 1580 1581/* Make a constant expression from the given host word taking into 1582 account (of course) the host word size. */ 1583extern IRExpr* mkIRExpr_HWord ( HWord ); 1584 1585/* Convenience function for constructing clean helper calls. */ 1586extern 1587IRExpr* mkIRExprCCall ( IRType retty, 1588 Int regparms, HChar* name, void* addr, 1589 IRExpr** args ); 1590 1591 1592/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or 1593 * Iex_Const). */ 1594static inline Bool isIRAtom ( IRExpr* e ) { 1595 return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const); 1596} 1597 1598/* Are these two IR atoms identical? Causes an assertion 1599 failure if they are passed non-atoms. */ 1600extern Bool eqIRAtom ( IRExpr*, IRExpr* ); 1601 1602 1603/* ------------------ Jump kinds ------------------ */ 1604 1605/* This describes hints which can be passed to the dispatcher at guest 1606 control-flow transfer points. 1607 1608 Re Ijk_TInval: the guest state _must_ have two pseudo-registers, 1609 guest_TISTART and guest_TILEN, which specify the start and length 1610 of the region to be invalidated. These are both the size of a 1611 guest word. It is the responsibility of the relevant toIR.c to 1612 ensure that these are filled in with suitable values before issuing 1613 a jump of kind Ijk_TInval. 1614 1615 Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a 1616 pseudo-register guest_EMWARN, which is 32-bits regardless of the 1617 host or guest word size. That register should be made to hold an 1618 EmWarn_* value to indicate the reason for the exit. 1619 1620 In the case of Ijk_EmFail, the exit is fatal (Vex-generated code 1621 cannot continue) and so the jump destination can be anything. 1622 1623 Re Ijk_Sys_ (syscall jumps): the guest state must have a 1624 pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest 1625 word. Front ends should set this to be the IP at the most recently 1626 executed kernel-entering (system call) instruction. This makes it 1627 very much easier (viz, actually possible at all) to back up the 1628 guest to restart a syscall that has been interrupted by a signal. 1629*/ 1630typedef 1631 enum { 1632 Ijk_Boring=0x16000, /* not interesting; just goto next */ 1633 Ijk_Call, /* guest is doing a call */ 1634 Ijk_Ret, /* guest is doing a return */ 1635 Ijk_ClientReq, /* do guest client req before continuing */ 1636 Ijk_Yield, /* client is yielding to thread scheduler */ 1637 Ijk_EmWarn, /* report emulation warning before continuing */ 1638 Ijk_EmFail, /* emulation critical (FATAL) error; give up */ 1639 Ijk_NoDecode, /* next instruction cannot be decoded */ 1640 Ijk_MapFail, /* Vex-provided address translation failed */ 1641 Ijk_TInval, /* Invalidate translations before continuing. */ 1642 Ijk_NoRedir, /* Jump to un-redirected guest addr */ 1643 Ijk_SigTRAP, /* current instruction synths SIGTRAP */ 1644 Ijk_SigSEGV, /* current instruction synths SIGSEGV */ 1645 Ijk_SigBUS, /* current instruction synths SIGBUS */ 1646 /* Unfortunately, various guest-dependent syscall kinds. They 1647 all mean: do a syscall before continuing. */ 1648 Ijk_Sys_syscall, /* amd64 'syscall', ppc 'sc', arm 'svc #0' */ 1649 Ijk_Sys_int32, /* amd64/x86 'int $0x20' */ 1650 Ijk_Sys_int128, /* amd64/x86 'int $0x80' */ 1651 Ijk_Sys_int129, /* amd64/x86 'int $0x81' */ 1652 Ijk_Sys_int130, /* amd64/x86 'int $0x82' */ 1653 Ijk_Sys_sysenter /* x86 'sysenter'. guest_EIP becomes 1654 invalid at the point this happens. */ 1655 } 1656 IRJumpKind; 1657 1658extern void ppIRJumpKind ( IRJumpKind ); 1659 1660 1661/* ------------------ Dirty helper calls ------------------ */ 1662 1663/* A dirty call is a flexible mechanism for calling (possibly 1664 conditionally) a helper function or procedure. The helper function 1665 may read, write or modify client memory, and may read, write or 1666 modify client state. It can take arguments and optionally return a 1667 value. It may return different results and/or do different things 1668 when called repeatedly with the same arguments, by means of storing 1669 private state. 1670 1671 If a value is returned, it is assigned to the nominated return 1672 temporary. 1673 1674 Dirty calls are statements rather than expressions for obvious 1675 reasons. If a dirty call is marked as writing guest state, any 1676 values derived from the written parts of the guest state are 1677 invalid. Similarly, if the dirty call is stated as writing 1678 memory, any loaded values are invalidated by it. 1679 1680 In order that instrumentation is possible, the call must state, and 1681 state correctly: 1682 1683 * whether it reads, writes or modifies memory, and if so where 1684 (only one chunk can be stated) 1685 1686 * whether it reads, writes or modifies guest state, and if so which 1687 pieces (several pieces may be stated, and currently their extents 1688 must be known at translation-time). 1689 1690 Normally, code is generated to pass just the args to the helper. 1691 However, if .needsBBP is set, then an extra first argument is 1692 passed, which is the baseblock pointer, so that the callee can 1693 access the guest state. It is invalid for .nFxState to be zero 1694 but .needsBBP to be True, since .nFxState==0 is a claim that the 1695 call does not access guest state. 1696 1697 IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict. The 1698 arguments are evaluated REGARDLESS of the guard value. It is 1699 unspecified the relative order of arg evaluation and guard 1700 evaluation. 1701*/ 1702 1703#define VEX_N_FXSTATE 7 /* enough for FXSAVE/FXRSTOR on x86 */ 1704 1705/* Effects on resources (eg. registers, memory locations) */ 1706typedef 1707 enum { 1708 Ifx_None = 0x17000, /* no effect */ 1709 Ifx_Read, /* reads the resource */ 1710 Ifx_Write, /* writes the resource */ 1711 Ifx_Modify, /* modifies the resource */ 1712 } 1713 IREffect; 1714 1715/* Pretty-print an IREffect */ 1716extern void ppIREffect ( IREffect ); 1717 1718 1719typedef 1720 struct { 1721 /* What to call, and details of args/results */ 1722 IRCallee* cee; /* where to call */ 1723 IRExpr* guard; /* :: Ity_Bit. Controls whether call happens */ 1724 IRExpr** args; /* arg list, ends in NULL */ 1725 IRTemp tmp; /* to assign result to, or IRTemp_INVALID if none */ 1726 1727 /* Mem effects; we allow only one R/W/M region to be stated */ 1728 IREffect mFx; /* indicates memory effects, if any */ 1729 IRExpr* mAddr; /* of access, or NULL if mFx==Ifx_None */ 1730 Int mSize; /* of access, or zero if mFx==Ifx_None */ 1731 1732 /* Guest state effects; up to N allowed */ 1733 Bool needsBBP; /* True => also pass guest state ptr to callee */ 1734 Int nFxState; /* must be 0 .. VEX_N_FXSTATE */ 1735 struct { 1736 IREffect fx; /* read, write or modify? Ifx_None is invalid. */ 1737 Int offset; 1738 Int size; 1739 } fxState[VEX_N_FXSTATE]; 1740 } 1741 IRDirty; 1742 1743/* Pretty-print a dirty call */ 1744extern void ppIRDirty ( IRDirty* ); 1745 1746/* Allocate an uninitialised dirty call */ 1747extern IRDirty* emptyIRDirty ( void ); 1748 1749/* Deep-copy a dirty call */ 1750extern IRDirty* deepCopyIRDirty ( IRDirty* ); 1751 1752/* A handy function which takes some of the tedium out of constructing 1753 dirty helper calls. The called function impliedly does not return 1754 any value and has a constant-True guard. The call is marked as 1755 accessing neither guest state nor memory (hence the "unsafe" 1756 designation) -- you can change this marking later if need be. A 1757 suitable IRCallee is constructed from the supplied bits. */ 1758extern 1759IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr, 1760 IRExpr** args ); 1761 1762/* Similarly, make a zero-annotation dirty call which returns a value, 1763 and assign that to the given temp. */ 1764extern 1765IRDirty* unsafeIRDirty_1_N ( IRTemp dst, 1766 Int regparms, HChar* name, void* addr, 1767 IRExpr** args ); 1768 1769 1770/* --------------- Memory Bus Events --------------- */ 1771 1772typedef 1773 enum { 1774 Imbe_Fence=0x18000, 1775 } 1776 IRMBusEvent; 1777 1778extern void ppIRMBusEvent ( IRMBusEvent ); 1779 1780 1781/* --------------- Compare and Swap --------------- */ 1782 1783/* This denotes an atomic compare and swap operation, either 1784 a single-element one or a double-element one. 1785 1786 In the single-element case: 1787 1788 .addr is the memory address. 1789 .end is the endianness with which memory is accessed 1790 1791 If .addr contains the same value as .expdLo, then .dataLo is 1792 written there, else there is no write. In both cases, the 1793 original value at .addr is copied into .oldLo. 1794 1795 Types: .expdLo, .dataLo and .oldLo must all have the same type. 1796 It may be any integral type, viz: I8, I16, I32 or, for 64-bit 1797 guests, I64. 1798 1799 .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must 1800 be NULL. 1801 1802 In the double-element case: 1803 1804 .addr is the memory address. 1805 .end is the endianness with which memory is accessed 1806 1807 The operation is the same: 1808 1809 If .addr contains the same value as .expdHi:.expdLo, then 1810 .dataHi:.dataLo is written there, else there is no write. In 1811 both cases the original value at .addr is copied into 1812 .oldHi:.oldLo. 1813 1814 Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must 1815 all have the same type, which may be any integral type, viz: I8, 1816 I16, I32 or, for 64-bit guests, I64. 1817 1818 The double-element case is complicated by the issue of 1819 endianness. In all cases, the two elements are understood to be 1820 located adjacently in memory, starting at the address .addr. 1821 1822 If .end is Iend_LE, then the .xxxLo component is at the lower 1823 address and the .xxxHi component is at the higher address, and 1824 each component is itself stored little-endianly. 1825 1826 If .end is Iend_BE, then the .xxxHi component is at the lower 1827 address and the .xxxLo component is at the higher address, and 1828 each component is itself stored big-endianly. 1829 1830 This allows representing more cases than most architectures can 1831 handle. For example, x86 cannot do DCAS on 8- or 16-bit elements. 1832 1833 How to know if the CAS succeeded? 1834 1835 * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo), 1836 then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now 1837 stored at .addr, and the original value there was .oldLo (resp 1838 .oldHi:.oldLo). 1839 1840 * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo), 1841 then the CAS failed, and the original value at .addr was .oldLo 1842 (resp. .oldHi:.oldLo). 1843 1844 Hence it is easy to know whether or not the CAS succeeded. 1845*/ 1846typedef 1847 struct { 1848 IRTemp oldHi; /* old value of *addr is written here */ 1849 IRTemp oldLo; 1850 IREndness end; /* endianness of the data in memory */ 1851 IRExpr* addr; /* store address */ 1852 IRExpr* expdHi; /* expected old value at *addr */ 1853 IRExpr* expdLo; 1854 IRExpr* dataHi; /* new value for *addr */ 1855 IRExpr* dataLo; 1856 } 1857 IRCAS; 1858 1859extern void ppIRCAS ( IRCAS* cas ); 1860 1861extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo, 1862 IREndness end, IRExpr* addr, 1863 IRExpr* expdHi, IRExpr* expdLo, 1864 IRExpr* dataHi, IRExpr* dataLo ); 1865 1866extern IRCAS* deepCopyIRCAS ( IRCAS* ); 1867 1868/* ------------------ Statements ------------------ */ 1869 1870/* The different kinds of statements. Their meaning is explained 1871 below in the comments for IRStmt. 1872 1873 Those marked META do not represent code, but rather extra 1874 information about the code. These statements can be removed 1875 without affecting the functional behaviour of the code, however 1876 they are required by some IR consumers such as tools that 1877 instrument the code. 1878*/ 1879 1880typedef 1881 enum { 1882 Ist_NoOp=0x19000, 1883 Ist_IMark, /* META */ 1884 Ist_AbiHint, /* META */ 1885 Ist_Put, 1886 Ist_PutI, 1887 Ist_WrTmp, 1888 Ist_Store, 1889 Ist_CAS, 1890 Ist_LLSC, 1891 Ist_Dirty, 1892 Ist_MBE, /* META (maybe) */ 1893 Ist_Exit 1894 } 1895 IRStmtTag; 1896 1897/* A statement. Stored as a tagged union. 'tag' indicates what kind 1898 of expression this is. 'Ist' is the union that holds the fields. 1899 If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store 1900 statement, and the fields can be accessed with 1901 'st.Ist.Store.<fieldname>'. 1902 1903 For each kind of statement, we show what it looks like when 1904 pretty-printed with ppIRStmt(). 1905*/ 1906typedef 1907 struct _IRStmt { 1908 IRStmtTag tag; 1909 union { 1910 /* A no-op (usually resulting from IR optimisation). Can be 1911 omitted without any effect. 1912 1913 ppIRStmt output: IR-NoOp 1914 */ 1915 struct { 1916 } NoOp; 1917 1918 /* META: instruction mark. Marks the start of the statements 1919 that represent a single machine instruction (the end of 1920 those statements is marked by the next IMark or the end of 1921 the IRSB). Contains the address and length of the 1922 instruction. 1923 1924 It also contains a delta value. The delta must be 1925 subtracted from a guest program counter value before 1926 attempting to establish, by comparison with the address 1927 and length values, whether or not that program counter 1928 value refers to this instruction. For x86, amd64, ppc32, 1929 ppc64 and arm, the delta value is zero. For Thumb 1930 instructions, the delta value is one. This is because, on 1931 Thumb, guest PC values (guest_R15T) are encoded using the 1932 top 31 bits of the instruction address and a 1 in the lsb; 1933 hence they appear to be (numerically) 1 past the start of 1934 the instruction they refer to. IOW, guest_R15T on ARM 1935 holds a standard ARM interworking address. 1936 1937 ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------, 1938 eg. ------ IMark(0x4000792, 5, 0) ------, 1939 */ 1940 struct { 1941 Addr64 addr; /* instruction address */ 1942 Int len; /* instruction length */ 1943 UChar delta; /* addr = program counter as encoded in guest state 1944 - delta */ 1945 } IMark; 1946 1947 /* META: An ABI hint, which says something about this 1948 platform's ABI. 1949 1950 At the moment, the only AbiHint is one which indicates 1951 that a given chunk of address space, [base .. base+len-1], 1952 has become undefined. This is used on amd64-linux and 1953 some ppc variants to pass stack-redzoning hints to whoever 1954 wants to see them. It also indicates the address of the 1955 next (dynamic) instruction that will be executed. This is 1956 to help Memcheck to origin tracking. 1957 1958 ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ====== 1959 eg. ====== AbiHint(t1, 16, t2) ====== 1960 */ 1961 struct { 1962 IRExpr* base; /* Start of undefined chunk */ 1963 Int len; /* Length of undefined chunk */ 1964 IRExpr* nia; /* Address of next (guest) insn */ 1965 } AbiHint; 1966 1967 /* Write a guest register, at a fixed offset in the guest state. 1968 ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1 1969 */ 1970 struct { 1971 Int offset; /* Offset into the guest state */ 1972 IRExpr* data; /* The value to write */ 1973 } Put; 1974 1975 /* Write a guest register, at a non-fixed offset in the guest 1976 state. See the comment for GetI expressions for more 1977 information. 1978 1979 ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>, 1980 eg. PUTI(64:8xF64)[t5,0] = t1 1981 */ 1982 struct { 1983 IRRegArray* descr; /* Part of guest state treated as circular */ 1984 IRExpr* ix; /* Variable part of index into array */ 1985 Int bias; /* Constant offset part of index into array */ 1986 IRExpr* data; /* The value to write */ 1987 } PutI; 1988 1989 /* Assign a value to a temporary. Note that SSA rules require 1990 each tmp is only assigned to once. IR sanity checking will 1991 reject any block containing a temporary which is not assigned 1992 to exactly once. 1993 1994 ppIRStmt output: t<tmp> = <data>, eg. t1 = 3 1995 */ 1996 struct { 1997 IRTemp tmp; /* Temporary (LHS of assignment) */ 1998 IRExpr* data; /* Expression (RHS of assignment) */ 1999 } WrTmp; 2000 2001 /* Write a value to memory. This is a normal store, not a 2002 Store-Conditional. To represent a Store-Conditional, 2003 instead use IRStmt.LLSC. 2004 ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2 2005 */ 2006 struct { 2007 IREndness end; /* Endianness of the store */ 2008 IRExpr* addr; /* store address */ 2009 IRExpr* data; /* value to write */ 2010 } Store; 2011 2012 /* Do an atomic compare-and-swap operation. Semantics are 2013 described above on a comment at the definition of IRCAS. 2014 2015 ppIRStmt output: 2016 t<tmp> = CAS<end>(<addr> :: <expected> -> <new>) 2017 eg 2018 t1 = CASle(t2 :: t3->Add32(t3,1)) 2019 which denotes a 32-bit atomic increment 2020 of a value at address t2 2021 2022 A double-element CAS may also be denoted, in which case <tmp>, 2023 <expected> and <new> are all pairs of items, separated by 2024 commas. 2025 */ 2026 struct { 2027 IRCAS* details; 2028 } CAS; 2029 2030 /* Either Load-Linked or Store-Conditional, depending on 2031 STOREDATA. 2032 2033 If STOREDATA is NULL then this is a Load-Linked, meaning 2034 that data is loaded from memory as normal, but a 2035 'reservation' for the address is also lodged in the 2036 hardware. 2037 2038 result = Load-Linked(addr, end) 2039 2040 The data transfer type is the type of RESULT (I32, I64, 2041 etc). ppIRStmt output: 2042 2043 result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1) 2044 2045 If STOREDATA is not NULL then this is a Store-Conditional, 2046 hence: 2047 2048 result = Store-Conditional(addr, storedata, end) 2049 2050 The data transfer type is the type of STOREDATA and RESULT 2051 has type Ity_I1. The store may fail or succeed depending 2052 on the state of a previously lodged reservation on this 2053 address. RESULT is written 1 if the store succeeds and 0 2054 if it fails. eg ppIRStmt output: 2055 2056 result = ( ST<end>-Cond(<addr>) = <storedata> ) 2057 eg t3 = ( STbe-Cond(t1, t2) ) 2058 2059 In all cases, the address must be naturally aligned for 2060 the transfer type -- any misaligned addresses should be 2061 caught by a dominating IR check and side exit. This 2062 alignment restriction exists because on at least some 2063 LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on 2064 misaligned addresses, and we have to actually generate 2065 stwcx. on the host, and we don't want it trapping on the 2066 host. 2067 2068 Summary of rules for transfer type: 2069 STOREDATA == NULL (LL): 2070 transfer type = type of RESULT 2071 STOREDATA != NULL (SC): 2072 transfer type = type of STOREDATA, and RESULT :: Ity_I1 2073 */ 2074 struct { 2075 IREndness end; 2076 IRTemp result; 2077 IRExpr* addr; 2078 IRExpr* storedata; /* NULL => LL, non-NULL => SC */ 2079 } LLSC; 2080 2081 /* Call (possibly conditionally) a C function that has side 2082 effects (ie. is "dirty"). See the comments above the 2083 IRDirty type declaration for more information. 2084 2085 ppIRStmt output: 2086 t<tmp> = DIRTY <guard> <effects> 2087 ::: <callee>(<args>) 2088 eg. 2089 t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4) 2090 ::: foo{0x380035f4}(t2) 2091 */ 2092 struct { 2093 IRDirty* details; 2094 } Dirty; 2095 2096 /* A memory bus event - a fence, or acquisition/release of the 2097 hardware bus lock. IR optimisation treats all these as fences 2098 across which no memory references may be moved. 2099 ppIRStmt output: MBusEvent-Fence, 2100 MBusEvent-BusLock, MBusEvent-BusUnlock. 2101 */ 2102 struct { 2103 IRMBusEvent event; 2104 } MBE; 2105 2106 /* Conditional exit from the middle of an IRSB. 2107 ppIRStmt output: if (<guard>) goto {<jk>} <dst> 2108 eg. if (t69) goto {Boring} 0x4000AAA:I32 2109 */ 2110 struct { 2111 IRExpr* guard; /* Conditional expression */ 2112 IRJumpKind jk; /* Jump kind */ 2113 IRConst* dst; /* Jump target (constant only) */ 2114 } Exit; 2115 } Ist; 2116 } 2117 IRStmt; 2118 2119/* Statement constructors. */ 2120extern IRStmt* IRStmt_NoOp ( void ); 2121extern IRStmt* IRStmt_IMark ( Addr64 addr, Int len, UChar delta ); 2122extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia ); 2123extern IRStmt* IRStmt_Put ( Int off, IRExpr* data ); 2124extern IRStmt* IRStmt_PutI ( IRRegArray* descr, IRExpr* ix, Int bias, 2125 IRExpr* data ); 2126extern IRStmt* IRStmt_WrTmp ( IRTemp tmp, IRExpr* data ); 2127extern IRStmt* IRStmt_Store ( IREndness end, IRExpr* addr, IRExpr* data ); 2128extern IRStmt* IRStmt_CAS ( IRCAS* details ); 2129extern IRStmt* IRStmt_LLSC ( IREndness end, IRTemp result, 2130 IRExpr* addr, IRExpr* storedata ); 2131extern IRStmt* IRStmt_Dirty ( IRDirty* details ); 2132extern IRStmt* IRStmt_MBE ( IRMBusEvent event ); 2133extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ); 2134 2135/* Deep-copy an IRStmt. */ 2136extern IRStmt* deepCopyIRStmt ( IRStmt* ); 2137 2138/* Pretty-print an IRStmt. */ 2139extern void ppIRStmt ( IRStmt* ); 2140 2141 2142/* ------------------ Basic Blocks ------------------ */ 2143 2144/* Type environments: a bunch of statements, expressions, etc, are 2145 incomplete without an environment indicating the type of each 2146 IRTemp. So this provides one. IR temporaries are really just 2147 unsigned ints and so this provides an array, 0 .. n_types_used-1 of 2148 them. 2149*/ 2150typedef 2151 struct { 2152 IRType* types; 2153 Int types_size; 2154 Int types_used; 2155 } 2156 IRTypeEnv; 2157 2158/* Obtain a new IRTemp */ 2159extern IRTemp newIRTemp ( IRTypeEnv*, IRType ); 2160 2161/* Deep-copy a type environment */ 2162extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* ); 2163 2164/* Pretty-print a type environment */ 2165extern void ppIRTypeEnv ( IRTypeEnv* ); 2166 2167 2168/* Code blocks, which in proper compiler terminology are superblocks 2169 (single entry, multiple exit code sequences) contain: 2170 2171 - A table giving a type for each temp (the "type environment") 2172 - An expandable array of statements 2173 - An expression of type 32 or 64 bits, depending on the 2174 guest's word size, indicating the next destination if the block 2175 executes all the way to the end, without a side exit 2176 - An indication of any special actions (JumpKind) needed 2177 for this final jump. 2178 2179 "IRSB" stands for "IR Super Block". 2180*/ 2181typedef 2182 struct { 2183 IRTypeEnv* tyenv; 2184 IRStmt** stmts; 2185 Int stmts_size; 2186 Int stmts_used; 2187 IRExpr* next; 2188 IRJumpKind jumpkind; 2189 } 2190 IRSB; 2191 2192/* Allocate a new, uninitialised IRSB */ 2193extern IRSB* emptyIRSB ( void ); 2194 2195/* Deep-copy an IRSB */ 2196extern IRSB* deepCopyIRSB ( IRSB* ); 2197 2198/* Deep-copy an IRSB, except for the statements list, which set to be 2199 a new, empty, list of statements. */ 2200extern IRSB* deepCopyIRSBExceptStmts ( IRSB* ); 2201 2202/* Pretty-print an IRSB */ 2203extern void ppIRSB ( IRSB* ); 2204 2205/* Append an IRStmt to an IRSB */ 2206extern void addStmtToIRSB ( IRSB*, IRStmt* ); 2207 2208 2209/*---------------------------------------------------------------*/ 2210/*--- Helper functions for the IR ---*/ 2211/*---------------------------------------------------------------*/ 2212 2213/* For messing with IR type environments */ 2214extern IRTypeEnv* emptyIRTypeEnv ( void ); 2215 2216/* What is the type of this expression? */ 2217extern IRType typeOfIRConst ( IRConst* ); 2218extern IRType typeOfIRTemp ( IRTypeEnv*, IRTemp ); 2219extern IRType typeOfIRExpr ( IRTypeEnv*, IRExpr* ); 2220 2221/* Sanity check a BB of IR */ 2222extern void sanityCheckIRSB ( IRSB* bb, 2223 HChar* caller, 2224 Bool require_flatness, 2225 IRType guest_word_size ); 2226extern Bool isFlatIRStmt ( IRStmt* ); 2227 2228/* Is this any value actually in the enumeration 'IRType' ? */ 2229extern Bool isPlausibleIRType ( IRType ty ); 2230 2231#endif /* ndef __LIBVEX_IR_H */ 2232 2233 2234/*---------------------------------------------------------------*/ 2235/*--- libvex_ir.h ---*/ 2236/*---------------------------------------------------------------*/ 2237