libvex_ir.h revision e71e56a90e91ce37b0ee846a4ff94493d59f2095
1
2/*---------------------------------------------------------------*/
3/*--- begin                                       libvex_ir.h ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2010 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#ifndef __LIBVEX_IR_H
37#define __LIBVEX_IR_H
38
39#include "libvex_basictypes.h"
40
41
42/*---------------------------------------------------------------*/
43/*--- High-level IR description                               ---*/
44/*---------------------------------------------------------------*/
45
46/* Vex IR is an architecture-neutral intermediate representation.
47   Unlike some IRs in systems similar to Vex, it is not like assembly
48   language (ie. a list of instructions).  Rather, it is more like the
49   IR that might be used in a compiler.
50
51   Code blocks
52   ~~~~~~~~~~~
53   The code is broken into small code blocks ("superblocks", type:
54   'IRSB').  Each code block typically represents from 1 to perhaps 50
55   instructions.  IRSBs are single-entry, multiple-exit code blocks.
56   Each IRSB contains three things:
57   - a type environment, which indicates the type of each temporary
58     value present in the IRSB
59   - a list of statements, which represent code
60   - a jump that exits from the end the IRSB
61   Because the blocks are multiple-exit, there can be additional
62   conditional exit statements that cause control to leave the IRSB
63   before the final exit.  Also because of this, IRSBs can cover
64   multiple non-consecutive sequences of code (up to 3).  These are
65   recorded in the type VexGuestExtents (see libvex.h).
66
67   Statements and expressions
68   ~~~~~~~~~~~~~~~~~~~~~~~~~~
69   Statements (type 'IRStmt') represent operations with side-effects,
70   eg.  guest register writes, stores, and assignments to temporaries.
71   Expressions (type 'IRExpr') represent operations without
72   side-effects, eg. arithmetic operations, loads, constants.
73   Expressions can contain sub-expressions, forming expression trees,
74   eg. (3 + (4 * load(addr1)).
75
76   Storage of guest state
77   ~~~~~~~~~~~~~~~~~~~~~~
78   The "guest state" contains the guest registers of the guest machine
79   (ie.  the machine that we are simulating).  It is stored by default
80   in a block of memory supplied by the user of the VEX library,
81   generally referred to as the guest state (area).  To operate on
82   these registers, one must first read ("Get") them from the guest
83   state into a temporary value.  Afterwards, one can write ("Put")
84   them back into the guest state.
85
86   Get and Put are characterised by a byte offset into the guest
87   state, a small integer which effectively gives the identity of the
88   referenced guest register, and a type, which indicates the size of
89   the value to be transferred.
90
91   The basic "Get" and "Put" operations are sufficient to model normal
92   fixed registers on the guest.  Selected areas of the guest state
93   can be treated as a circular array of registers (type:
94   'IRRegArray'), which can be indexed at run-time.  This is done with
95   the "GetI" and "PutI" primitives.  This is necessary to describe
96   rotating register files, for example the x87 FPU stack, SPARC
97   register windows, and the Itanium register files.
98
99   Examples, and flattened vs. unflattened code
100   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101   For example, consider this x86 instruction:
102
103     addl %eax, %ebx
104
105   One Vex IR translation for this code would be this:
106
107     ------ IMark(0x24F275, 7, 0) ------
108     t3 = GET:I32(0)             # get %eax, a 32-bit integer
109     t2 = GET:I32(12)            # get %ebx, a 32-bit integer
110     t1 = Add32(t3,t2)           # addl
111     PUT(0) = t1                 # put %eax
112
113   (For simplicity, this ignores the effects on the condition codes, and
114   the update of the instruction pointer.)
115
116   The "IMark" is an IR statement that doesn't represent actual code.
117   Instead it indicates the address and length of the original
118   instruction.  The numbers 0 and 12 are offsets into the guest state
119   for %eax and %ebx.  The full list of offsets for an architecture
120   <ARCH> can be found in the type VexGuest<ARCH>State in the file
121   VEX/pub/libvex_guest_<ARCH>.h.
122
123   The five statements in this example are:
124   - the IMark
125   - three assignments to temporaries
126   - one register write (put)
127
128   The six expressions in this example are:
129   - two register reads (gets)
130   - one arithmetic (add) operation
131   - three temporaries (two nested within the Add32, one in the PUT)
132
133   The above IR is "flattened", ie. all sub-expressions are "atoms",
134   either constants or temporaries.  An equivalent, unflattened version
135   would be:
136
137     PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139   IR is guaranteed to be flattened at instrumentation-time.  This makes
140   instrumentation easier.  Equivalent flattened and unflattened IR
141   typically results in the same generated code.
142
143   Another example, this one showing loads and stores:
144
145     addl %edx,4(%eax)
146
147   This becomes (again ignoring condition code and instruction pointer
148   updates):
149
150     ------ IMark(0x4000ABA, 3, 0) ------
151     t3 = Add32(GET:I32(0),0x4:I32)
152     t2 = LDle:I32(t3)
153     t1 = GET:I32(8)
154     t0 = Add32(t2,t1)
155     STle(t3) = t0
156
157   The "le" in "LDle" and "STle" is short for "little-endian".
158
159   No need for deallocations
160   ~~~~~~~~~~~~~~~~~~~~~~~~~
161   Although there are allocation functions for various data structures
162   in this file, there are no deallocation functions.  This is because
163   Vex uses a memory allocation scheme that automatically reclaims the
164   memory used by allocated structures once translation is completed.
165   This makes things easier for tools that instruments/transforms code
166   blocks.
167
168   SSAness and typing
169   ~~~~~~~~~~~~~~~~~~
170   The IR is fully typed.  For every IRSB (IR block) it is possible to
171   say unambiguously whether or not it is correctly typed.
172   Incorrectly typed IR has no meaning and the VEX will refuse to
173   process it.  At various points during processing VEX typechecks the
174   IR and aborts if any violations are found.  This seems overkill but
175   makes it a great deal easier to build a reliable JIT.
176
177   IR also has the SSA property.  SSA stands for Static Single
178   Assignment, and what it means is that each IR temporary may be
179   assigned to only once.  This idea became widely used in compiler
180   construction in the mid to late 90s.  It makes many IR-level
181   transformations/code improvements easier, simpler and faster.
182   Whenever it typechecks an IR block, VEX also checks the SSA
183   property holds, and will abort if not so.  So SSAness is
184   mechanically and rigidly enforced.
185*/
186
187/*---------------------------------------------------------------*/
188/*--- Type definitions for the IR                             ---*/
189/*---------------------------------------------------------------*/
190
191/* General comments about naming schemes:
192
193   All publically visible functions contain the name of the primary
194   type on which they operate (IRFoo, IRBar, etc).  Hence you should
195   be able to identify these functions by grepping for "IR[A-Z]".
196
197   For some type 'IRFoo':
198
199   - ppIRFoo is the printing method for IRFoo, printing it to the
200     output channel specified in the LibVEX_Initialise call.
201
202   - eqIRFoo is a structural equality predicate for IRFoos.
203
204   - deepCopyIRFoo is a deep copy constructor for IRFoos.
205     It recursively traverses the entire argument tree and
206     produces a complete new tree.  All types have a deep copy
207     constructor.
208
209   - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210     It creates a new top-level copy of the supplied object,
211     but does not copy any sub-objects.  Only some types have a
212     shallow copy constructor.
213*/
214
215/* ------------------ Types ------------------ */
216
217/* A type indicates the size of a value, and whether it's an integer, a
218   float, or a vector (SIMD) value. */
219typedef
220   enum {
221      Ity_INVALID=0x11000,
222      Ity_I1,
223      Ity_I8,
224      Ity_I16,
225      Ity_I32,
226      Ity_I64,
227      Ity_I128,  /* 128-bit scalar */
228      Ity_F32,   /* IEEE 754 float */
229      Ity_F64,   /* IEEE 754 double */
230      Ity_F128,  /* 128-bit floating point; implementation defined */
231      Ity_V128   /* 128-bit SIMD */
232   }
233   IRType;
234
235/* Pretty-print an IRType */
236extern void ppIRType ( IRType );
237
238/* Get the size (in bytes) of an IRType */
239extern Int sizeofIRType ( IRType );
240
241
242/* ------------------ Endianness ------------------ */
243
244/* IREndness is used in load IRExprs and store IRStmts. */
245typedef
246   enum {
247      Iend_LE=0x12000, /* little endian */
248      Iend_BE          /* big endian */
249   }
250   IREndness;
251
252
253/* ------------------ Constants ------------------ */
254
255/* IRConsts are used within 'Const' and 'Exit' IRExprs. */
256
257/* The various kinds of constant. */
258typedef
259   enum {
260      Ico_U1=0x13000,
261      Ico_U8,
262      Ico_U16,
263      Ico_U32,
264      Ico_U64,
265      Ico_F32,   /* 32-bit IEEE754 floating */
266      Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
267                    as a IEEE754 single value. */
268      Ico_F64,   /* 64-bit IEEE754 floating */
269      Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
270                    as a IEEE754 double value. */
271      Ico_V128   /* 128-bit restricted vector constant, with 1 bit
272                    (repeated 8 times) for each of the 16 x 1-byte lanes */
273   }
274   IRConstTag;
275
276/* A constant.  Stored as a tagged union.  'tag' indicates what kind of
277   constant this is.  'Ico' is the union that holds the fields.  If an
278   IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
279   and its value can be accessed with 'c.Ico.U32'. */
280typedef
281   struct _IRConst {
282      IRConstTag tag;
283      union {
284         Bool   U1;
285         UChar  U8;
286         UShort U16;
287         UInt   U32;
288         ULong  U64;
289         Float  F32;
290         UInt   F32i;
291         Double F64;
292         ULong  F64i;
293         UShort V128;   /* 16-bit value; see Ico_V128 comment above */
294      } Ico;
295   }
296   IRConst;
297
298/* IRConst constructors */
299extern IRConst* IRConst_U1   ( Bool );
300extern IRConst* IRConst_U8   ( UChar );
301extern IRConst* IRConst_U16  ( UShort );
302extern IRConst* IRConst_U32  ( UInt );
303extern IRConst* IRConst_U64  ( ULong );
304extern IRConst* IRConst_F32  ( Float );
305extern IRConst* IRConst_F32i ( UInt );
306extern IRConst* IRConst_F64  ( Double );
307extern IRConst* IRConst_F64i ( ULong );
308extern IRConst* IRConst_V128 ( UShort );
309
310/* Deep-copy an IRConst */
311extern IRConst* deepCopyIRConst ( IRConst* );
312
313/* Pretty-print an IRConst */
314extern void ppIRConst ( IRConst* );
315
316/* Compare two IRConsts for equality */
317extern Bool eqIRConst ( IRConst*, IRConst* );
318
319
320/* ------------------ Call targets ------------------ */
321
322/* Describes a helper function to call.  The name part is purely for
323   pretty printing and not actually used.  regparms=n tells the back
324   end that the callee has been declared
325   "__attribute__((regparm(n)))", although indirectly using the
326   VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
327   to construct a non-standard sequence to call a function declared
328   like this.
329
330   mcx_mask is a sop to Memcheck.  It indicates which args should be
331   considered 'always defined' when lazily computing definedness of
332   the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
333   args[1], etc.  If a bit is set, the corresponding arg is excluded
334   (hence "x" in "mcx") from definedness checking.
335*/
336
337typedef
338   struct {
339      Int    regparms;
340      HChar* name;
341      void*  addr;
342      UInt   mcx_mask;
343   }
344   IRCallee;
345
346/* Create an IRCallee. */
347extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
348
349/* Deep-copy an IRCallee. */
350extern IRCallee* deepCopyIRCallee ( IRCallee* );
351
352/* Pretty-print an IRCallee. */
353extern void ppIRCallee ( IRCallee* );
354
355
356/* ------------------ Guest state arrays ------------------ */
357
358/* This describes a section of the guest state that we want to
359   be able to index at run time, so as to be able to describe
360   indexed or rotating register files on the guest. */
361typedef
362   struct {
363      Int    base;   /* guest state offset of start of indexed area */
364      IRType elemTy; /* type of each element in the indexed area */
365      Int    nElems; /* number of elements in the indexed area */
366   }
367   IRRegArray;
368
369extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
370
371extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
372
373extern void ppIRRegArray ( IRRegArray* );
374extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
375
376
377/* ------------------ Temporaries ------------------ */
378
379/* This represents a temporary, eg. t1.  The IR optimiser relies on the
380   fact that IRTemps are 32-bit ints.  Do not change them to be ints of
381   any other size. */
382typedef UInt IRTemp;
383
384/* Pretty-print an IRTemp. */
385extern void ppIRTemp ( IRTemp );
386
387#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
388
389
390/* --------------- Primops (arity 1,2,3 and 4) --------------- */
391
392/* Primitive operations that are used in Unop, Binop, Triop and Qop
393   IRExprs.  Once we take into account integer, floating point and SIMD
394   operations of all the different sizes, there are quite a lot of them.
395   Most instructions supported by the architectures that Vex supports
396   (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
397   are not;  they are instead handled with dirty helpers that emulate
398   their functionality.  Such obscure ones are thus not directly visible
399   in the IR, but their effects on guest state (memory and registers)
400   are made visible via the annotations in IRDirty structures.
401*/
402typedef
403   enum {
404      /* -- Do not change this ordering.  The IR generators rely on
405            (eg) Iop_Add64 == IopAdd8 + 3. -- */
406
407      Iop_INVALID=0x14000,
408      Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
409      Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
410      /* Signless mul.  MullS/MullU is elsewhere. */
411      Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
412      Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
413      Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
414      Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
415      Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
416      Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
417      Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
418      /* Integer comparisons. */
419      Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
420      Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
421      /* Tags for unary ops */
422      Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
423
424      /* Exactly like CmpEQ8/16/32/64, but carrying the additional
425         hint that these compute the success/failure of a CAS
426         operation, and hence are almost certainly applied to two
427         copies of the same value, which in turn has implications for
428         Memcheck's instrumentation. */
429      Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
430      Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
431
432      /* -- Ordering not important after here. -- */
433
434      /* Widening multiplies */
435      Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
436      Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
437
438      /* Wierdo integer stuff */
439      Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
440      Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
441      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
442         zero.  You must ensure they are never given a zero argument.
443      */
444
445      /* Standard integer comparisons */
446      Iop_CmpLT32S, Iop_CmpLT64S,
447      Iop_CmpLE32S, Iop_CmpLE64S,
448      Iop_CmpLT32U, Iop_CmpLT64U,
449      Iop_CmpLE32U, Iop_CmpLE64U,
450
451      /* As a sop to Valgrind-Memcheck, the following are useful. */
452      Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
453      Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
454      Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
455      Iop_Max32U, /* unsigned max */
456
457      /* PowerPC-style 3-way integer comparisons.  Without them it is
458         difficult to simulate PPC efficiently.
459         op(x,y) | x < y  = 0x8 else
460                 | x > y  = 0x4 else
461                 | x == y = 0x2
462      */
463      Iop_CmpORD32U, Iop_CmpORD64U,
464      Iop_CmpORD32S, Iop_CmpORD64S,
465
466      /* Division */
467      /* TODO: clarify semantics wrt rounding, negative values, whatever */
468      Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
469      Iop_DivS32,   // ditto, signed
470      Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
471      Iop_DivS64,   // ditto, signed
472      Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
473      Iop_DivS64E,  // ditto, signed
474      Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
475      Iop_DivS32E,  // ditto, signed
476
477      Iop_DivModU64to32, // :: I64,I32 -> I64
478                         // of which lo half is div and hi half is mod
479      Iop_DivModS64to32, // ditto, signed
480
481      Iop_DivModU128to64, // :: V128,I64 -> V128
482                          // of which lo half is div and hi half is mod
483      Iop_DivModS128to64, // ditto, signed
484
485      Iop_DivModS64to64, // :: I64,I64 -> I128
486                         // of which lo half is div and hi half is mod
487
488      /* Integer conversions.  Some of these are redundant (eg
489         Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
490         having a complete set reduces the typical dynamic size of IR
491         and makes the instruction selectors easier to write. */
492
493      /* Widening conversions */
494      Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
495                  Iop_16Uto32, Iop_16Uto64,
496                               Iop_32Uto64,
497      Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
498                  Iop_16Sto32, Iop_16Sto64,
499                               Iop_32Sto64,
500
501      /* Narrowing conversions */
502      Iop_64to8, Iop_32to8, Iop_64to16,
503      /* 8 <-> 16 bit conversions */
504      Iop_16to8,      // :: I16 -> I8, low half
505      Iop_16HIto8,    // :: I16 -> I8, high half
506      Iop_8HLto16,    // :: (I8,I8) -> I16
507      /* 16 <-> 32 bit conversions */
508      Iop_32to16,     // :: I32 -> I16, low half
509      Iop_32HIto16,   // :: I32 -> I16, high half
510      Iop_16HLto32,   // :: (I16,I16) -> I32
511      /* 32 <-> 64 bit conversions */
512      Iop_64to32,     // :: I64 -> I32, low half
513      Iop_64HIto32,   // :: I64 -> I32, high half
514      Iop_32HLto64,   // :: (I32,I32) -> I64
515      /* 64 <-> 128 bit conversions */
516      Iop_128to64,    // :: I128 -> I64, low half
517      Iop_128HIto64,  // :: I128 -> I64, high half
518      Iop_64HLto128,  // :: (I64,I64) -> I128
519      /* 1-bit stuff */
520      Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
521      Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
522      Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
523      Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
524      Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
525      Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
526      Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
527      Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
528      Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
529      Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
530
531      /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
532
533      /* --- Simple stuff as mandated by 754. --- */
534
535      /* Binary operations, with rounding. */
536      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
537      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
538
539      /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
540      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
541
542      /* Variants of the above which produce a 64-bit result but which
543         round their result to a IEEE float range first. */
544      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
545      Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
546
547      /* Unary operations, without rounding. */
548      /* :: F64 -> F64 */
549      Iop_NegF64, Iop_AbsF64,
550
551      /* :: F32 -> F32 */
552      Iop_NegF32, Iop_AbsF32,
553
554      /* Unary operations, with rounding. */
555      /* :: IRRoundingMode(I32) x F64 -> F64 */
556      Iop_SqrtF64, Iop_SqrtF64r32,
557
558      /* :: IRRoundingMode(I32) x F32 -> F32 */
559      Iop_SqrtF32,
560
561      /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
562            0x45 Unordered
563            0x01 LT
564            0x00 GT
565            0x40 EQ
566         This just happens to be the Intel encoding.  The values
567         are recorded in the type IRCmpF64Result.
568      */
569      /* :: F64 x F64 -> IRCmpF64Result(I32) */
570      Iop_CmpF64,
571      Iop_CmpF32,
572      Iop_CmpF128,
573
574      /* --- Int to/from FP conversions. --- */
575
576      /* For the most part, these take a first argument :: Ity_I32 (as
577         IRRoundingMode) which is an indication of the rounding mode
578         to use, as per the following encoding ("the standard
579         encoding"):
580            00b  to nearest (the default)
581            01b  to -infinity
582            10b  to +infinity
583            11b  to zero
584         This just happens to be the Intel encoding.  For reference only,
585         the PPC encoding is:
586            00b  to nearest (the default)
587            01b  to zero
588            10b  to +infinity
589            11b  to -infinity
590         Any PPC -> IR front end will have to translate these PPC
591         encodings, as encoded in the guest state, to the standard
592         encodings, to pass to the primops.
593         For reference only, the ARM VFP encoding is:
594            00b  to nearest
595            01b  to +infinity
596            10b  to -infinity
597            11b  to zero
598         Again, this will have to be converted to the standard encoding
599         to pass to primops.
600
601         If one of these conversions gets an out-of-range condition,
602         or a NaN, as an argument, the result is host-defined.  On x86
603         the "integer indefinite" value 0x80..00 is produced.  On PPC
604         it is either 0x80..00 or 0x7F..FF depending on the sign of
605         the argument.
606
607         On ARMvfp, when converting to a signed integer result, the
608         overflow result is 0x80..00 for negative args and 0x7F..FF
609         for positive args.  For unsigned integer results it is
610         0x00..00 and 0xFF..FF respectively.
611
612         Rounding is required whenever the destination type cannot
613         represent exactly all values of the source type.
614      */
615      Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
616      Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
617      Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
618      Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
619
620      Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
621
622      Iop_I16StoF64, /*                       signed I16 -> F64 */
623      Iop_I32StoF64, /*                       signed I32 -> F64 */
624      Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
625      Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
626      Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
627
628      Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
629
630      Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
631      Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
632      Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
633
634      Iop_I16StoF32, /*                       signed I16 -> F32 */
635      Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
636      Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
637
638      /* Conversion between floating point formats */
639      Iop_F32toF64,  /*                       F32 -> F64 */
640      Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
641
642      /* Reinterpretation.  Take an F64 and produce an I64 with
643         the same bit pattern, or vice versa. */
644      Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
645      Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
646
647      /* Support for 128-bit floating point */
648      Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
649      Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
650      Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
651
652      /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
653      Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
654
655      /* :: F128 -> F128 */
656      Iop_NegF128, Iop_AbsF128,
657
658      /* :: IRRoundingMode(I32) x F128 -> F128 */
659      Iop_SqrtF128,
660
661      Iop_I32StoF128, /*                signed I32  -> F128 */
662      Iop_I64StoF128, /*                signed I64  -> F128 */
663      Iop_F32toF128,  /*                       F32  -> F128 */
664      Iop_F64toF128,  /*                       F64  -> F128 */
665
666      Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
667      Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
668      Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
669      Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
670
671      /* --- guest x86/amd64 specifics, not mandated by 754. --- */
672
673      /* Binary ops, with rounding. */
674      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
675      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
676      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
677      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
678      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
679      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
680      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
681      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
682      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
683      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
684         as the IEEE mandated RemF64, except it is limited in the
685         range of its operand.  Hence the partialness. */
686
687      /* Unary ops, with rounding. */
688      /* :: IRRoundingMode(I32) x F64 -> F64 */
689      Iop_SinF64,    /* FSIN */
690      Iop_CosF64,    /* FCOS */
691      Iop_TanF64,    /* FTAN */
692      Iop_2xm1F64,   /* (2^arg - 1.0) */
693      Iop_RoundF64toInt, /* F64 value to nearest integral value (still
694                            as F64) */
695      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
696                            as F32) */
697
698      /* --- guest s390 specifics, not mandated by 754. --- */
699
700      /* Fused multiply-add/sub */
701      /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
702            (computes op3 * op2 +/- op1 */
703      Iop_MAddF32, Iop_MSubF32,
704
705      /* --- guest ppc32/64 specifics, not mandated by 754. --- */
706
707      /* Ternary operations, with rounding. */
708      /* Fused multiply-add/sub, with 112-bit intermediate
709         precision for ppc.
710         Also used to implement fused multiply-add/sub for s390. */
711      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
712            (computes arg2 * arg3 +/- arg4) */
713      Iop_MAddF64, Iop_MSubF64,
714
715      /* Variants of the above which produce a 64-bit result but which
716         round their result to a IEEE float range first. */
717      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
718      Iop_MAddF64r32, Iop_MSubF64r32,
719
720      /* :: F64 -> F64 */
721      Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
722      Iop_RoundF64toF64_NEAREST, /* frin */
723      Iop_RoundF64toF64_NegINF,  /* frim */
724      Iop_RoundF64toF64_PosINF,  /* frip */
725      Iop_RoundF64toF64_ZERO,    /* friz */
726
727      /* :: F64 -> F32 */
728      Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
729
730      /* :: IRRoundingMode(I32) x F64 -> F64 */
731      Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
732      /* NB: pretty much the same as Iop_F64toF32, except no change
733         of type. */
734
735      /* :: F64 -> I32 */
736      Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
737                       from FP result */
738
739      /* ------------------ 32-bit SIMD Integer ------------------ */
740
741      /* 16x2 add/sub, also signed/unsigned saturating variants */
742      Iop_Add16x2, Iop_Sub16x2,
743      Iop_QAdd16Sx2, Iop_QAdd16Ux2,
744      Iop_QSub16Sx2, Iop_QSub16Ux2,
745
746      /* 16x2 signed/unsigned halving add/sub.  For each lane, these
747         compute bits 16:1 of (eg) sx(argL) + sx(argR),
748         or zx(argL) - zx(argR) etc. */
749      Iop_HAdd16Ux2, Iop_HAdd16Sx2,
750      Iop_HSub16Ux2, Iop_HSub16Sx2,
751
752      /* 8x4 add/sub, also signed/unsigned saturating variants */
753      Iop_Add8x4, Iop_Sub8x4,
754      Iop_QAdd8Sx4, Iop_QAdd8Ux4,
755      Iop_QSub8Sx4, Iop_QSub8Ux4,
756
757      /* 8x4 signed/unsigned halving add/sub.  For each lane, these
758         compute bits 8:1 of (eg) sx(argL) + sx(argR),
759         or zx(argL) - zx(argR) etc. */
760      Iop_HAdd8Ux4, Iop_HAdd8Sx4,
761      Iop_HSub8Ux4, Iop_HSub8Sx4,
762
763      /* 8x4 sum of absolute unsigned differences. */
764      Iop_Sad8Ux4,
765
766      /* MISC (vector integer cmp != 0) */
767      Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
768
769      /* ------------------ 64-bit SIMD FP ------------------------ */
770
771      /* Convertion to/from int */
772      Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
773      Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
774      /* Fixed32 format is floating-point number with fixed number of fraction
775         bits. The number of fraction bits is passed as a second argument of
776         type I8. */
777      Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
778      Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
779
780      /* Binary operations */
781      Iop_Max32Fx2,      Iop_Min32Fx2,
782      /* Pairwise Min and Max. See integer pairwise operations for more
783         details. */
784      Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
785      /* Note: For the following compares, the arm front-end assumes a
786         nan in a lane of either argument returns zero for that lane. */
787      Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
788
789      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
790      element in the operand vector, and places the results in the destination
791      vector.  */
792      Iop_Recip32Fx2,
793
794      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
795         Note, that if one of the arguments is zero and another one is infinity
796         of arbitrary sign the result of the operation is 2.0. */
797      Iop_Recps32Fx2,
798
799      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
800         square root of each element in the operand vector. */
801      Iop_Rsqrte32Fx2,
802
803      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
804         Note, that of one of the arguments is zero and another one is infiinty
805         of arbitrary sign the result of the operation is 1.5. */
806      Iop_Rsqrts32Fx2,
807
808      /* Unary */
809      Iop_Neg32Fx2, Iop_Abs32Fx2,
810
811      /* ------------------ 64-bit SIMD Integer. ------------------ */
812
813      /* MISC (vector integer cmp != 0) */
814      Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
815
816      /* ADDITION (normal / unsigned sat / signed sat) */
817      Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
818      Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
819      Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
820
821      /* PAIRWISE operations */
822      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
823            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
824      Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
825      Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
826      Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
827      Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
828      Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
829      /* Longening variant is unary. The resulting vector contains two times
830         less elements than operand, but they are two times wider.
831         Example:
832            Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
833               where a+b and c+d are unsigned 32-bit values. */
834      Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
835      Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
836
837      /* SUBTRACTION (normal / unsigned sat / signed sat) */
838      Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
839      Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
840      Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
841
842      /* ABSOLUTE VALUE */
843      Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
844
845      /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
846      Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
847      Iop_Mul32Fx2,
848      Iop_MulHi16Ux4,
849      Iop_MulHi16Sx4,
850      /* Plynomial multiplication treats it's arguments as coefficients of
851         polynoms over {0, 1}. */
852      Iop_PolynomialMul8x8,
853
854      /* Vector Saturating Doubling Multiply Returning High Half and
855         Vector Saturating Rounding Doubling Multiply Returning High Half */
856      /* These IROp's multiply corresponding elements in two vectors, double
857         the results, and place the most significant half of the final results
858         in the destination vector. The results are truncated or rounded. If
859         any of the results overflow, they are saturated. */
860      Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
861      Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
862
863      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
864      Iop_Avg8Ux8,
865      Iop_Avg16Ux4,
866
867      /* MIN/MAX */
868      Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
869      Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
870      Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
871      Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
872
873      /* COMPARISON */
874      Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
875      Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
876      Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
877
878      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
879         bit) */
880      Iop_Cnt8x8,
881      Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
882      Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
883
884      /* VECTOR x VECTOR SHIFT / ROTATE */
885      Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
886      Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
887      Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
888      Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
889
890      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
891      Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
892      Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
893      Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
894
895      /* VECTOR x VECTOR SATURATING SHIFT */
896      Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
897      Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
898      /* VECTOR x INTEGER SATURATING SHIFT */
899      Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
900      Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
901      Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
902
903      /* NARROWING (binary)
904         -- narrow 2xI64 into 1xI64, hi half from left arg */
905      /* For saturated narrowing, I believe there are 4 variants of
906         the basic arithmetic operation, depending on the signedness
907         of argument and result.  Here are examples that exemplify
908         what I mean:
909
910         QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
911                                      return x[7:0];
912
913         QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
914                                      if (x >s  127) x = 127;
915                                      return x[7:0];
916
917         QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
918                                      return x[7:0];
919
920         QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
921                                      if (x >s 255) x = 255;
922                                      return x[7:0];
923      */
924      Iop_QNarrowBin16Sto8Ux8,
925      Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
926
927      /* INTERLEAVING */
928      /* Interleave lanes from low or high halves of
929         operands.  Most-significant result lane is from the left
930         arg. */
931      Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
932      Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
933      /* Interleave odd/even lanes of operands.  Most-significant result lane
934         is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
935         identical to Interleave{HI,LO}32x2 and so are omitted.*/
936      Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
937      Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
938
939
940      /* CONCATENATION -- build a new value by concatenating either
941         the even or odd lanes of both operands.  Note that
942         Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
943         and so are omitted. */
944      Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
945      Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
946
947      /* GET / SET elements of VECTOR
948         GET is binop (I64, I8) -> I<elem_size>
949         SET is triop (I64, I8, I<elem_size>) -> I64 */
950      /* Note: the arm back-end handles only constant second argument */
951      Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
952      Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
953
954      /* DUPLICATING -- copy value to all lanes */
955      Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
956
957      /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
958         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
959         result.
960         It is a triop: (I64, I64, I8) -> I64 */
961      /* Note: the arm back-end handles only constant third argumnet. */
962      Iop_Extract64,
963
964      /* REVERSE the order of elements in each Half-words, Words,
965         Double-words */
966      /* Examples:
967            Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
968            Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
969            Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
970      Iop_Reverse16_8x8,
971      Iop_Reverse32_8x8, Iop_Reverse32_16x4,
972      Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
973
974      /* PERMUTING -- copy src bytes to dst,
975         as indexed by control vector bytes:
976            for i in 0 .. 7 . result[i] = argL[ argR[i] ]
977         argR[i] values may only be in the range 0 .. 7, else behaviour
978         is undefined. */
979      Iop_Perm8x8,
980
981      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
982         See floating-point equiwalents for details. */
983      Iop_Recip32x2, Iop_Rsqrte32x2,
984
985      /* ------------------ 128-bit SIMD FP. ------------------ */
986
987      /* --- 32x4 vector FP --- */
988
989      /* binary */
990      Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
991      Iop_Max32Fx4, Iop_Min32Fx4,
992      Iop_Add32Fx2, Iop_Sub32Fx2,
993      /* Note: For the following compares, the ppc and arm front-ends assume a
994         nan in a lane of either argument returns zero for that lane. */
995      Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
996      Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
997
998      /* Vector Absolute */
999      Iop_Abs32Fx4,
1000
1001      /* Pairwise Max and Min. See integer pairwise operations for details. */
1002      Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1003
1004      /* unary */
1005      Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1006      Iop_Neg32Fx4,
1007
1008      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1009      element in the operand vector, and places the results in the destination
1010      vector.  */
1011      Iop_Recip32Fx4,
1012
1013      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1014         Note, that if one of the arguments is zero and another one is infinity
1015         of arbitrary sign the result of the operation is 2.0. */
1016      Iop_Recps32Fx4,
1017
1018      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1019         square root of each element in the operand vector. */
1020      Iop_Rsqrte32Fx4,
1021
1022      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1023         Note, that of one of the arguments is zero and another one is infiinty
1024         of arbitrary sign the result of the operation is 1.5. */
1025      Iop_Rsqrts32Fx4,
1026
1027
1028      /* --- Int to/from FP conversion --- */
1029      /* Unlike the standard fp conversions, these irops take no
1030         rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1031         indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1032      Iop_I32UtoFx4,  Iop_I32StoFx4,       /* I32x4 -> F32x4       */
1033      Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
1034      Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (with saturation) */
1035      Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
1036      Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
1037      /* Fixed32 format is floating-point number with fixed number of fraction
1038         bits. The number of fraction bits is passed as a second argument of
1039         type I8. */
1040      Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1041      Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1042
1043      /* --- Single to/from half conversion --- */
1044      /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1045      Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
1046
1047      /* --- 32x4 lowest-lane-only scalar FP --- */
1048
1049      /* In binary cases, upper 3/4 is copied from first operand.  In
1050         unary cases, upper 3/4 is copied from the operand. */
1051
1052      /* binary */
1053      Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1054      Iop_Max32F0x4, Iop_Min32F0x4,
1055      Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1056
1057      /* unary */
1058      Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1059
1060      /* --- 64x2 vector FP --- */
1061
1062      /* binary */
1063      Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1064      Iop_Max64Fx2, Iop_Min64Fx2,
1065      Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1066
1067      /* unary */
1068      Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1069
1070      /* --- 64x2 lowest-lane-only scalar FP --- */
1071
1072      /* In binary cases, upper half is copied from first operand.  In
1073         unary cases, upper half is copied from the operand. */
1074
1075      /* binary */
1076      Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1077      Iop_Max64F0x2, Iop_Min64F0x2,
1078      Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1079
1080      /* unary */
1081      Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1082
1083      /* --- pack / unpack --- */
1084
1085      /* 64 <-> 128 bit vector */
1086      Iop_V128to64,     // :: V128 -> I64, low half
1087      Iop_V128HIto64,   // :: V128 -> I64, high half
1088      Iop_64HLtoV128,   // :: (I64,I64) -> V128
1089
1090      Iop_64UtoV128,
1091      Iop_SetV128lo64,
1092
1093      /* 32 <-> 128 bit vector */
1094      Iop_32UtoV128,
1095      Iop_V128to32,     // :: V128 -> I32, lowest lane
1096      Iop_SetV128lo32,  // :: (V128,I32) -> V128
1097
1098      /* ------------------ 128-bit SIMD Integer. ------------------ */
1099
1100      /* BITWISE OPS */
1101      Iop_NotV128,
1102      Iop_AndV128, Iop_OrV128, Iop_XorV128,
1103
1104      /* VECTOR SHIFT (shift amt :: Ity_I8) */
1105      Iop_ShlV128, Iop_ShrV128,
1106
1107      /* MISC (vector integer cmp != 0) */
1108      Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1109
1110      /* ADDITION (normal / unsigned sat / signed sat) */
1111      Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
1112      Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1113      Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1114
1115      /* SUBTRACTION (normal / unsigned sat / signed sat) */
1116      Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
1117      Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1118      Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1119
1120      /* MULTIPLICATION (normal / high half of signed/unsigned) */
1121      Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
1122                    Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1123                    Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1124      /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1125      Iop_MullEven8Ux16, Iop_MullEven16Ux8,
1126      Iop_MullEven8Sx16, Iop_MullEven16Sx8,
1127      /* FIXME: document these */
1128      Iop_Mull8Ux8, Iop_Mull8Sx8,
1129      Iop_Mull16Ux4, Iop_Mull16Sx4,
1130      Iop_Mull32Ux2, Iop_Mull32Sx2,
1131      /* Vector Saturating Doubling Multiply Returning High Half and
1132         Vector Saturating Rounding Doubling Multiply Returning High Half */
1133      /* These IROp's multiply corresponding elements in two vectors, double
1134         the results, and place the most significant half of the final results
1135         in the destination vector. The results are truncated or rounded. If
1136         any of the results overflow, they are saturated. */
1137      Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1138      Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1139      /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1140      Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1141      /* Plynomial multiplication treats it's arguments as coefficients of
1142         polynoms over {0, 1}. */
1143      Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1144      Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
1145
1146      /* PAIRWISE operations */
1147      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1148            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1149      Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1150      Iop_PwAdd32Fx2,
1151      /* Longening variant is unary. The resulting vector contains two times
1152         less elements than operand, but they are two times wider.
1153         Example:
1154            Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1155               where a+b and c+d are unsigned 32-bit values. */
1156      Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1157      Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1158
1159      /* ABSOLUTE VALUE */
1160      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1161
1162      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1163      Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1164      Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1165
1166      /* MIN/MAX */
1167      Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
1168      Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
1169      Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
1170      Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
1171
1172      /* COMPARISON */
1173      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,
1174      Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1175      Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
1176
1177      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1178         bit) */
1179      Iop_Cnt8x16,
1180      Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1181      Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1182
1183      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1184      Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1185      Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1186      Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1187
1188      /* VECTOR x VECTOR SHIFT / ROTATE */
1189      Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1190      Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1191      Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1192      Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1193      Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
1194
1195      /* VECTOR x VECTOR SATURATING SHIFT */
1196      Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1197      Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1198      /* VECTOR x INTEGER SATURATING SHIFT */
1199      Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1200      Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1201      Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1202
1203      /* NARROWING (binary)
1204         -- narrow 2xV128 into 1xV128, hi half from left arg */
1205      /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1206      Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1207      Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1208      Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1209      Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1210
1211      /* NARROWING (unary) -- narrow V128 into I64 */
1212      Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1213      /* Saturating narrowing from signed source to signed/unsigned destination */
1214      Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1215      Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1216      /* Saturating narrowing from unsigned source to unsigned destination */
1217      Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1218
1219      /* WIDENING -- sign or zero extend each element of the argument
1220         vector to the twice original size.  The resulting vector consists of
1221         the same number of elements but each element and the vector itself
1222         are twice as wide.
1223         All operations are I64->V128.
1224         Example
1225            Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1226               where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1227      Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1228      Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1229
1230      /* INTERLEAVING */
1231      /* Interleave lanes from low or high halves of
1232         operands.  Most-significant result lane is from the left
1233         arg. */
1234      Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1235      Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1236      Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1237      Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1238      /* Interleave odd/even lanes of operands.  Most-significant result lane
1239         is from the left arg. */
1240      Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1241      Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1242      Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1243
1244      /* CONCATENATION -- build a new value by concatenating either
1245         the even or odd lanes of both operands. */
1246      Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1247      Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1248
1249      /* GET elements of VECTOR
1250         GET is binop (V128, I8) -> I<elem_size> */
1251      /* Note: the arm back-end handles only constant second argument. */
1252      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1253
1254      /* DUPLICATING -- copy value to all lanes */
1255      Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
1256
1257      /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1258         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1259         result.
1260         It is a triop: (V128, V128, I8) -> V128 */
1261      /* Note: the ARM back end handles only constant arg3 in this operation. */
1262      Iop_ExtractV128,
1263
1264      /* REVERSE the order of elements in each Half-words, Words,
1265         Double-words */
1266      /* Examples:
1267            Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1268            Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1269      Iop_Reverse16_8x16,
1270      Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1271      Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1272
1273      /* PERMUTING -- copy src bytes to dst,
1274         as indexed by control vector bytes:
1275            for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1276         argR[i] values may only be in the range 0 .. 15, else behaviour
1277         is undefined. */
1278      Iop_Perm8x16,
1279
1280      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1281         See floating-point equiwalents for details. */
1282      Iop_Recip32x4, Iop_Rsqrte32x4
1283   }
1284   IROp;
1285
1286/* Pretty-print an op. */
1287extern void ppIROp ( IROp );
1288
1289
1290/* Encoding of IEEE754-specified rounding modes.  This is the same as
1291   the encoding used by Intel IA32 to indicate x87 rounding mode.
1292   Note, various front and back ends rely on the actual numerical
1293   values of these, so do not change them. */
1294typedef
1295   enum {
1296      Irrm_NEAREST = 0,
1297      Irrm_NegINF  = 1,
1298      Irrm_PosINF  = 2,
1299      Irrm_ZERO    = 3
1300   }
1301   IRRoundingMode;
1302
1303/* Floating point comparison result values, as created by Iop_CmpF64.
1304   This is also derived from what IA32 does. */
1305typedef
1306   enum {
1307      Ircr_UN = 0x45,
1308      Ircr_LT = 0x01,
1309      Ircr_GT = 0x00,
1310      Ircr_EQ = 0x40
1311   }
1312   IRCmpF64Result;
1313
1314typedef IRCmpF64Result IRCmpF32Result;
1315typedef IRCmpF64Result IRCmpF128Result;
1316
1317/* ------------------ Expressions ------------------ */
1318
1319/* The different kinds of expressions.  Their meaning is explained below
1320   in the comments for IRExpr. */
1321typedef
1322   enum {
1323      Iex_Binder=0x15000,
1324      Iex_Get,
1325      Iex_GetI,
1326      Iex_RdTmp,
1327      Iex_Qop,
1328      Iex_Triop,
1329      Iex_Binop,
1330      Iex_Unop,
1331      Iex_Load,
1332      Iex_Const,
1333      Iex_Mux0X,
1334      Iex_CCall
1335   }
1336   IRExprTag;
1337
1338/* An expression.  Stored as a tagged union.  'tag' indicates what kind
1339   of expression this is.  'Iex' is the union that holds the fields.  If
1340   an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1341   expression, and the fields can be accessed with
1342   'e.Iex.Load.<fieldname>'.
1343
1344   For each kind of expression, we show what it looks like when
1345   pretty-printed with ppIRExpr().
1346*/
1347typedef
1348   struct _IRExpr
1349   IRExpr;
1350
1351struct _IRExpr {
1352   IRExprTag tag;
1353   union {
1354      /* Used only in pattern matching within Vex.  Should not be seen
1355         outside of Vex. */
1356      struct {
1357         Int binder;
1358      } Binder;
1359
1360      /* Read a guest register, at a fixed offset in the guest state.
1361         ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1362      */
1363      struct {
1364         Int    offset;    /* Offset into the guest state */
1365         IRType ty;        /* Type of the value being read */
1366      } Get;
1367
1368      /* Read a guest register at a non-fixed offset in the guest
1369         state.  This allows circular indexing into parts of the guest
1370         state, which is essential for modelling situations where the
1371         identity of guest registers is not known until run time.  One
1372         example is the x87 FP register stack.
1373
1374         The part of the guest state to be treated as a circular array
1375         is described in the IRRegArray 'descr' field.  It holds the
1376         offset of the first element in the array, the type of each
1377         element, and the number of elements.
1378
1379         The array index is indicated rather indirectly, in a way
1380         which makes optimisation easy: as the sum of variable part
1381         (the 'ix' field) and a constant offset (the 'bias' field).
1382
1383         Since the indexing is circular, the actual array index to use
1384         is computed as (ix + bias) % num-of-elems-in-the-array.
1385
1386         Here's an example.  The description
1387
1388            (96:8xF64)[t39,-7]
1389
1390         describes an array of 8 F64-typed values, the
1391         guest-state-offset of the first being 96.  This array is
1392         being indexed at (t39 - 7) % 8.
1393
1394         It is important to get the array size/type exactly correct
1395         since IR optimisation looks closely at such info in order to
1396         establish aliasing/non-aliasing between seperate GetI and
1397         PutI events, which is used to establish when they can be
1398         reordered, etc.  Putting incorrect info in will lead to
1399         obscure IR optimisation bugs.
1400
1401            ppIRExpr output: GETI<descr>[<ix>,<bias]
1402                         eg. GETI(128:8xI8)[t1,0]
1403      */
1404      struct {
1405         IRRegArray* descr; /* Part of guest state treated as circular */
1406         IRExpr*     ix;    /* Variable part of index into array */
1407         Int         bias;  /* Constant offset part of index into array */
1408      } GetI;
1409
1410      /* The value held by a temporary.
1411         ppIRExpr output: t<tmp>, eg. t1
1412      */
1413      struct {
1414         IRTemp tmp;       /* The temporary number */
1415      } RdTmp;
1416
1417      /* A quaternary operation.
1418         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1419                      eg. MAddF64r32(t1, t2, t3, t4)
1420      */
1421      struct {
1422         IROp op;          /* op-code   */
1423         IRExpr* arg1;     /* operand 1 */
1424         IRExpr* arg2;     /* operand 2 */
1425         IRExpr* arg3;     /* operand 3 */
1426         IRExpr* arg4;     /* operand 4 */
1427      } Qop;
1428
1429      /* A ternary operation.
1430         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1431                      eg. MulF64(1, 2.0, 3.0)
1432      */
1433      struct {
1434         IROp op;          /* op-code   */
1435         IRExpr* arg1;     /* operand 1 */
1436         IRExpr* arg2;     /* operand 2 */
1437         IRExpr* arg3;     /* operand 3 */
1438      } Triop;
1439
1440      /* A binary operation.
1441         ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1442      */
1443      struct {
1444         IROp op;          /* op-code   */
1445         IRExpr* arg1;     /* operand 1 */
1446         IRExpr* arg2;     /* operand 2 */
1447      } Binop;
1448
1449      /* A unary operation.
1450         ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1451      */
1452      struct {
1453         IROp    op;       /* op-code */
1454         IRExpr* arg;      /* operand */
1455      } Unop;
1456
1457      /* A load from memory -- a normal load, not a load-linked.
1458         Load-Linkeds (and Store-Conditionals) are instead represented
1459         by IRStmt.LLSC since Load-Linkeds have side effects and so
1460         are not semantically valid IRExpr's.
1461         ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1462      */
1463      struct {
1464         IREndness end;    /* Endian-ness of the load */
1465         IRType    ty;     /* Type of the loaded value */
1466         IRExpr*   addr;   /* Address being loaded from */
1467      } Load;
1468
1469      /* A constant-valued expression.
1470         ppIRExpr output: <con>, eg. 0x4:I32
1471      */
1472      struct {
1473         IRConst* con;     /* The constant itself */
1474      } Const;
1475
1476      /* A call to a pure (no side-effects) helper C function.
1477
1478         With the 'cee' field, 'name' is the function's name.  It is
1479         only used for pretty-printing purposes.  The address to call
1480         (host address, of course) is stored in the 'addr' field
1481         inside 'cee'.
1482
1483         The 'args' field is a NULL-terminated array of arguments.
1484         The stated return IRType, and the implied argument types,
1485         must match that of the function being called well enough so
1486         that the back end can actually generate correct code for the
1487         call.
1488
1489         The called function **must** satisfy the following:
1490
1491         * no side effects -- must be a pure function, the result of
1492           which depends only on the passed parameters.
1493
1494         * it may not look at, nor modify, any of the guest state
1495           since that would hide guest state transitions from
1496           instrumenters
1497
1498         * it may not access guest memory, since that would hide
1499           guest memory transactions from the instrumenters
1500
1501         This is restrictive, but makes the semantics clean, and does
1502         not interfere with IR optimisation.
1503
1504         If you want to call a helper which can mess with guest state
1505         and/or memory, instead use Ist_Dirty.  This is a lot more
1506         flexible, but you have to give a bunch of details about what
1507         the helper does (and you better be telling the truth,
1508         otherwise any derived instrumentation will be wrong).  Also
1509         Ist_Dirty inhibits various IR optimisations and so can cause
1510         quite poor code to be generated.  Try to avoid it.
1511
1512         ppIRExpr output: <cee>(<args>):<retty>
1513                      eg. foo{0x80489304}(t1, t2):I32
1514      */
1515      struct {
1516         IRCallee* cee;    /* Function to call. */
1517         IRType    retty;  /* Type of return value. */
1518         IRExpr**  args;   /* Vector of argument expressions. */
1519      }  CCall;
1520
1521      /* A ternary if-then-else operator.  It returns expr0 if cond is
1522         zero, exprX otherwise.  Note that it is STRICT, ie. both
1523         expr0 and exprX are evaluated in all cases.
1524
1525         ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
1526                         eg. Mux0X(t6,t7,t8)
1527      */
1528      struct {
1529         IRExpr* cond;     /* Condition */
1530         IRExpr* expr0;    /* True expression */
1531         IRExpr* exprX;    /* False expression */
1532      } Mux0X;
1533   } Iex;
1534};
1535
1536/* Expression constructors. */
1537extern IRExpr* IRExpr_Binder ( Int binder );
1538extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
1539extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
1540extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
1541extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
1542                                        IRExpr* arg3, IRExpr* arg4 );
1543extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
1544                                        IRExpr* arg2, IRExpr* arg3 );
1545extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
1546extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
1547extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
1548extern IRExpr* IRExpr_Const  ( IRConst* con );
1549extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
1550extern IRExpr* IRExpr_Mux0X  ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
1551
1552/* Deep-copy an IRExpr. */
1553extern IRExpr* deepCopyIRExpr ( IRExpr* );
1554
1555/* Pretty-print an IRExpr. */
1556extern void ppIRExpr ( IRExpr* );
1557
1558/* NULL-terminated IRExpr vector constructors, suitable for
1559   use as arg lists in clean/dirty helper calls. */
1560extern IRExpr** mkIRExprVec_0 ( void );
1561extern IRExpr** mkIRExprVec_1 ( IRExpr* );
1562extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
1563extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
1564extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
1565extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1566                                IRExpr* );
1567extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1568                                IRExpr*, IRExpr* );
1569extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1570                                IRExpr*, IRExpr*, IRExpr* );
1571extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1572                                IRExpr*, IRExpr*, IRExpr*, IRExpr*);
1573
1574/* IRExpr copiers:
1575   - shallowCopy: shallow-copy (ie. create a new vector that shares the
1576     elements with the original).
1577   - deepCopy: deep-copy (ie. create a completely new vector). */
1578extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
1579extern IRExpr** deepCopyIRExprVec ( IRExpr** );
1580
1581/* Make a constant expression from the given host word taking into
1582   account (of course) the host word size. */
1583extern IRExpr* mkIRExpr_HWord ( HWord );
1584
1585/* Convenience function for constructing clean helper calls. */
1586extern
1587IRExpr* mkIRExprCCall ( IRType retty,
1588                        Int regparms, HChar* name, void* addr,
1589                        IRExpr** args );
1590
1591
1592/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
1593 * Iex_Const). */
1594static inline Bool isIRAtom ( IRExpr* e ) {
1595   return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
1596}
1597
1598/* Are these two IR atoms identical?  Causes an assertion
1599   failure if they are passed non-atoms. */
1600extern Bool eqIRAtom ( IRExpr*, IRExpr* );
1601
1602
1603/* ------------------ Jump kinds ------------------ */
1604
1605/* This describes hints which can be passed to the dispatcher at guest
1606   control-flow transfer points.
1607
1608   Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
1609   guest_TISTART and guest_TILEN, which specify the start and length
1610   of the region to be invalidated.  These are both the size of a
1611   guest word.  It is the responsibility of the relevant toIR.c to
1612   ensure that these are filled in with suitable values before issuing
1613   a jump of kind Ijk_TInval.
1614
1615   Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
1616   pseudo-register guest_EMWARN, which is 32-bits regardless of the
1617   host or guest word size.  That register should be made to hold an
1618   EmWarn_* value to indicate the reason for the exit.
1619
1620   In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
1621   cannot continue) and so the jump destination can be anything.
1622
1623   Re Ijk_Sys_ (syscall jumps): the guest state must have a
1624   pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
1625   word.  Front ends should set this to be the IP at the most recently
1626   executed kernel-entering (system call) instruction.  This makes it
1627   very much easier (viz, actually possible at all) to back up the
1628   guest to restart a syscall that has been interrupted by a signal.
1629*/
1630typedef
1631   enum {
1632      Ijk_Boring=0x16000, /* not interesting; just goto next */
1633      Ijk_Call,           /* guest is doing a call */
1634      Ijk_Ret,            /* guest is doing a return */
1635      Ijk_ClientReq,      /* do guest client req before continuing */
1636      Ijk_Yield,          /* client is yielding to thread scheduler */
1637      Ijk_EmWarn,         /* report emulation warning before continuing */
1638      Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
1639      Ijk_NoDecode,       /* next instruction cannot be decoded */
1640      Ijk_MapFail,        /* Vex-provided address translation failed */
1641      Ijk_TInval,         /* Invalidate translations before continuing. */
1642      Ijk_NoRedir,        /* Jump to un-redirected guest addr */
1643      Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
1644      Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
1645      Ijk_SigBUS,         /* current instruction synths SIGBUS */
1646      /* Unfortunately, various guest-dependent syscall kinds.  They
1647	 all mean: do a syscall before continuing. */
1648      Ijk_Sys_syscall,    /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
1649      Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
1650      Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
1651      Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
1652      Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
1653      Ijk_Sys_sysenter    /* x86 'sysenter'.  guest_EIP becomes
1654                             invalid at the point this happens. */
1655   }
1656   IRJumpKind;
1657
1658extern void ppIRJumpKind ( IRJumpKind );
1659
1660
1661/* ------------------ Dirty helper calls ------------------ */
1662
1663/* A dirty call is a flexible mechanism for calling (possibly
1664   conditionally) a helper function or procedure.  The helper function
1665   may read, write or modify client memory, and may read, write or
1666   modify client state.  It can take arguments and optionally return a
1667   value.  It may return different results and/or do different things
1668   when called repeatedly with the same arguments, by means of storing
1669   private state.
1670
1671   If a value is returned, it is assigned to the nominated return
1672   temporary.
1673
1674   Dirty calls are statements rather than expressions for obvious
1675   reasons.  If a dirty call is marked as writing guest state, any
1676   values derived from the written parts of the guest state are
1677   invalid.  Similarly, if the dirty call is stated as writing
1678   memory, any loaded values are invalidated by it.
1679
1680   In order that instrumentation is possible, the call must state, and
1681   state correctly:
1682
1683   * whether it reads, writes or modifies memory, and if so where
1684     (only one chunk can be stated)
1685
1686   * whether it reads, writes or modifies guest state, and if so which
1687     pieces (several pieces may be stated, and currently their extents
1688     must be known at translation-time).
1689
1690   Normally, code is generated to pass just the args to the helper.
1691   However, if .needsBBP is set, then an extra first argument is
1692   passed, which is the baseblock pointer, so that the callee can
1693   access the guest state.  It is invalid for .nFxState to be zero
1694   but .needsBBP to be True, since .nFxState==0 is a claim that the
1695   call does not access guest state.
1696
1697   IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
1698   arguments are evaluated REGARDLESS of the guard value.  It is
1699   unspecified the relative order of arg evaluation and guard
1700   evaluation.
1701*/
1702
1703#define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
1704
1705/* Effects on resources (eg. registers, memory locations) */
1706typedef
1707   enum {
1708      Ifx_None = 0x17000,   /* no effect */
1709      Ifx_Read,             /* reads the resource */
1710      Ifx_Write,            /* writes the resource */
1711      Ifx_Modify,           /* modifies the resource */
1712   }
1713   IREffect;
1714
1715/* Pretty-print an IREffect */
1716extern void ppIREffect ( IREffect );
1717
1718
1719typedef
1720   struct {
1721      /* What to call, and details of args/results */
1722      IRCallee* cee;    /* where to call */
1723      IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
1724      IRExpr**  args;   /* arg list, ends in NULL */
1725      IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
1726
1727      /* Mem effects; we allow only one R/W/M region to be stated */
1728      IREffect  mFx;    /* indicates memory effects, if any */
1729      IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
1730      Int       mSize;  /* of access, or zero if mFx==Ifx_None */
1731
1732      /* Guest state effects; up to N allowed */
1733      Bool needsBBP; /* True => also pass guest state ptr to callee */
1734      Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
1735      struct {
1736         IREffect fx;   /* read, write or modify?  Ifx_None is invalid. */
1737         Int      offset;
1738         Int      size;
1739      } fxState[VEX_N_FXSTATE];
1740   }
1741   IRDirty;
1742
1743/* Pretty-print a dirty call */
1744extern void     ppIRDirty ( IRDirty* );
1745
1746/* Allocate an uninitialised dirty call */
1747extern IRDirty* emptyIRDirty ( void );
1748
1749/* Deep-copy a dirty call */
1750extern IRDirty* deepCopyIRDirty ( IRDirty* );
1751
1752/* A handy function which takes some of the tedium out of constructing
1753   dirty helper calls.  The called function impliedly does not return
1754   any value and has a constant-True guard.  The call is marked as
1755   accessing neither guest state nor memory (hence the "unsafe"
1756   designation) -- you can change this marking later if need be.  A
1757   suitable IRCallee is constructed from the supplied bits. */
1758extern
1759IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
1760                             IRExpr** args );
1761
1762/* Similarly, make a zero-annotation dirty call which returns a value,
1763   and assign that to the given temp. */
1764extern
1765IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
1766                             Int regparms, HChar* name, void* addr,
1767                             IRExpr** args );
1768
1769
1770/* --------------- Memory Bus Events --------------- */
1771
1772typedef
1773   enum {
1774      Imbe_Fence=0x18000,
1775   }
1776   IRMBusEvent;
1777
1778extern void ppIRMBusEvent ( IRMBusEvent );
1779
1780
1781/* --------------- Compare and Swap --------------- */
1782
1783/* This denotes an atomic compare and swap operation, either
1784   a single-element one or a double-element one.
1785
1786   In the single-element case:
1787
1788     .addr is the memory address.
1789     .end  is the endianness with which memory is accessed
1790
1791     If .addr contains the same value as .expdLo, then .dataLo is
1792     written there, else there is no write.  In both cases, the
1793     original value at .addr is copied into .oldLo.
1794
1795     Types: .expdLo, .dataLo and .oldLo must all have the same type.
1796     It may be any integral type, viz: I8, I16, I32 or, for 64-bit
1797     guests, I64.
1798
1799     .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
1800     be NULL.
1801
1802   In the double-element case:
1803
1804     .addr is the memory address.
1805     .end  is the endianness with which memory is accessed
1806
1807     The operation is the same:
1808
1809     If .addr contains the same value as .expdHi:.expdLo, then
1810     .dataHi:.dataLo is written there, else there is no write.  In
1811     both cases the original value at .addr is copied into
1812     .oldHi:.oldLo.
1813
1814     Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
1815     all have the same type, which may be any integral type, viz: I8,
1816     I16, I32 or, for 64-bit guests, I64.
1817
1818     The double-element case is complicated by the issue of
1819     endianness.  In all cases, the two elements are understood to be
1820     located adjacently in memory, starting at the address .addr.
1821
1822       If .end is Iend_LE, then the .xxxLo component is at the lower
1823       address and the .xxxHi component is at the higher address, and
1824       each component is itself stored little-endianly.
1825
1826       If .end is Iend_BE, then the .xxxHi component is at the lower
1827       address and the .xxxLo component is at the higher address, and
1828       each component is itself stored big-endianly.
1829
1830   This allows representing more cases than most architectures can
1831   handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
1832
1833   How to know if the CAS succeeded?
1834
1835   * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
1836     then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
1837     stored at .addr, and the original value there was .oldLo (resp
1838     .oldHi:.oldLo).
1839
1840   * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
1841     then the CAS failed, and the original value at .addr was .oldLo
1842     (resp. .oldHi:.oldLo).
1843
1844   Hence it is easy to know whether or not the CAS succeeded.
1845*/
1846typedef
1847   struct {
1848      IRTemp    oldHi;  /* old value of *addr is written here */
1849      IRTemp    oldLo;
1850      IREndness end;    /* endianness of the data in memory */
1851      IRExpr*   addr;   /* store address */
1852      IRExpr*   expdHi; /* expected old value at *addr */
1853      IRExpr*   expdLo;
1854      IRExpr*   dataHi; /* new value for *addr */
1855      IRExpr*   dataLo;
1856   }
1857   IRCAS;
1858
1859extern void ppIRCAS ( IRCAS* cas );
1860
1861extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
1862                        IREndness end, IRExpr* addr,
1863                        IRExpr* expdHi, IRExpr* expdLo,
1864                        IRExpr* dataHi, IRExpr* dataLo );
1865
1866extern IRCAS* deepCopyIRCAS ( IRCAS* );
1867
1868/* ------------------ Statements ------------------ */
1869
1870/* The different kinds of statements.  Their meaning is explained
1871   below in the comments for IRStmt.
1872
1873   Those marked META do not represent code, but rather extra
1874   information about the code.  These statements can be removed
1875   without affecting the functional behaviour of the code, however
1876   they are required by some IR consumers such as tools that
1877   instrument the code.
1878*/
1879
1880typedef
1881   enum {
1882      Ist_NoOp=0x19000,
1883      Ist_IMark,     /* META */
1884      Ist_AbiHint,   /* META */
1885      Ist_Put,
1886      Ist_PutI,
1887      Ist_WrTmp,
1888      Ist_Store,
1889      Ist_CAS,
1890      Ist_LLSC,
1891      Ist_Dirty,
1892      Ist_MBE,       /* META (maybe) */
1893      Ist_Exit
1894   }
1895   IRStmtTag;
1896
1897/* A statement.  Stored as a tagged union.  'tag' indicates what kind
1898   of expression this is.  'Ist' is the union that holds the fields.
1899   If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
1900   statement, and the fields can be accessed with
1901   'st.Ist.Store.<fieldname>'.
1902
1903   For each kind of statement, we show what it looks like when
1904   pretty-printed with ppIRStmt().
1905*/
1906typedef
1907   struct _IRStmt {
1908      IRStmtTag tag;
1909      union {
1910         /* A no-op (usually resulting from IR optimisation).  Can be
1911            omitted without any effect.
1912
1913            ppIRStmt output: IR-NoOp
1914         */
1915         struct {
1916	 } NoOp;
1917
1918         /* META: instruction mark.  Marks the start of the statements
1919            that represent a single machine instruction (the end of
1920            those statements is marked by the next IMark or the end of
1921            the IRSB).  Contains the address and length of the
1922            instruction.
1923
1924            It also contains a delta value.  The delta must be
1925            subtracted from a guest program counter value before
1926            attempting to establish, by comparison with the address
1927            and length values, whether or not that program counter
1928            value refers to this instruction.  For x86, amd64, ppc32,
1929            ppc64 and arm, the delta value is zero.  For Thumb
1930            instructions, the delta value is one.  This is because, on
1931            Thumb, guest PC values (guest_R15T) are encoded using the
1932            top 31 bits of the instruction address and a 1 in the lsb;
1933            hence they appear to be (numerically) 1 past the start of
1934            the instruction they refer to.  IOW, guest_R15T on ARM
1935            holds a standard ARM interworking address.
1936
1937            ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
1938                         eg. ------ IMark(0x4000792, 5, 0) ------,
1939         */
1940         struct {
1941            Addr64 addr;   /* instruction address */
1942            Int    len;    /* instruction length */
1943            UChar  delta;  /* addr = program counter as encoded in guest state
1944                                     - delta */
1945         } IMark;
1946
1947         /* META: An ABI hint, which says something about this
1948            platform's ABI.
1949
1950            At the moment, the only AbiHint is one which indicates
1951            that a given chunk of address space, [base .. base+len-1],
1952            has become undefined.  This is used on amd64-linux and
1953            some ppc variants to pass stack-redzoning hints to whoever
1954            wants to see them.  It also indicates the address of the
1955            next (dynamic) instruction that will be executed.  This is
1956            to help Memcheck to origin tracking.
1957
1958            ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
1959                         eg. ====== AbiHint(t1, 16, t2) ======
1960         */
1961         struct {
1962            IRExpr* base;     /* Start  of undefined chunk */
1963            Int     len;      /* Length of undefined chunk */
1964            IRExpr* nia;      /* Address of next (guest) insn */
1965         } AbiHint;
1966
1967         /* Write a guest register, at a fixed offset in the guest state.
1968            ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
1969         */
1970         struct {
1971            Int     offset;   /* Offset into the guest state */
1972            IRExpr* data;     /* The value to write */
1973         } Put;
1974
1975         /* Write a guest register, at a non-fixed offset in the guest
1976            state.  See the comment for GetI expressions for more
1977            information.
1978
1979            ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
1980                         eg. PUTI(64:8xF64)[t5,0] = t1
1981         */
1982         struct {
1983            IRRegArray* descr; /* Part of guest state treated as circular */
1984            IRExpr*     ix;    /* Variable part of index into array */
1985            Int         bias;  /* Constant offset part of index into array */
1986            IRExpr*     data;  /* The value to write */
1987         } PutI;
1988
1989         /* Assign a value to a temporary.  Note that SSA rules require
1990            each tmp is only assigned to once.  IR sanity checking will
1991            reject any block containing a temporary which is not assigned
1992            to exactly once.
1993
1994            ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
1995         */
1996         struct {
1997            IRTemp  tmp;   /* Temporary  (LHS of assignment) */
1998            IRExpr* data;  /* Expression (RHS of assignment) */
1999         } WrTmp;
2000
2001         /* Write a value to memory.  This is a normal store, not a
2002            Store-Conditional.  To represent a Store-Conditional,
2003            instead use IRStmt.LLSC.
2004            ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2005         */
2006         struct {
2007            IREndness end;    /* Endianness of the store */
2008            IRExpr*   addr;   /* store address */
2009            IRExpr*   data;   /* value to write */
2010         } Store;
2011
2012         /* Do an atomic compare-and-swap operation.  Semantics are
2013            described above on a comment at the definition of IRCAS.
2014
2015            ppIRStmt output:
2016               t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2017            eg
2018               t1 = CASle(t2 :: t3->Add32(t3,1))
2019               which denotes a 32-bit atomic increment
2020               of a value at address t2
2021
2022            A double-element CAS may also be denoted, in which case <tmp>,
2023            <expected> and <new> are all pairs of items, separated by
2024            commas.
2025         */
2026         struct {
2027            IRCAS* details;
2028         } CAS;
2029
2030         /* Either Load-Linked or Store-Conditional, depending on
2031            STOREDATA.
2032
2033            If STOREDATA is NULL then this is a Load-Linked, meaning
2034            that data is loaded from memory as normal, but a
2035            'reservation' for the address is also lodged in the
2036            hardware.
2037
2038               result = Load-Linked(addr, end)
2039
2040            The data transfer type is the type of RESULT (I32, I64,
2041            etc).  ppIRStmt output:
2042
2043               result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2044
2045            If STOREDATA is not NULL then this is a Store-Conditional,
2046            hence:
2047
2048               result = Store-Conditional(addr, storedata, end)
2049
2050            The data transfer type is the type of STOREDATA and RESULT
2051            has type Ity_I1. The store may fail or succeed depending
2052            on the state of a previously lodged reservation on this
2053            address.  RESULT is written 1 if the store succeeds and 0
2054            if it fails.  eg ppIRStmt output:
2055
2056               result = ( ST<end>-Cond(<addr>) = <storedata> )
2057               eg t3 = ( STbe-Cond(t1, t2) )
2058
2059            In all cases, the address must be naturally aligned for
2060            the transfer type -- any misaligned addresses should be
2061            caught by a dominating IR check and side exit.  This
2062            alignment restriction exists because on at least some
2063            LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2064            misaligned addresses, and we have to actually generate
2065            stwcx. on the host, and we don't want it trapping on the
2066            host.
2067
2068            Summary of rules for transfer type:
2069              STOREDATA == NULL (LL):
2070                transfer type = type of RESULT
2071              STOREDATA != NULL (SC):
2072                transfer type = type of STOREDATA, and RESULT :: Ity_I1
2073         */
2074         struct {
2075            IREndness end;
2076            IRTemp    result;
2077            IRExpr*   addr;
2078            IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
2079         } LLSC;
2080
2081         /* Call (possibly conditionally) a C function that has side
2082            effects (ie. is "dirty").  See the comments above the
2083            IRDirty type declaration for more information.
2084
2085            ppIRStmt output:
2086               t<tmp> = DIRTY <guard> <effects>
2087                  ::: <callee>(<args>)
2088            eg.
2089               t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2090                     ::: foo{0x380035f4}(t2)
2091         */
2092         struct {
2093            IRDirty* details;
2094         } Dirty;
2095
2096         /* A memory bus event - a fence, or acquisition/release of the
2097            hardware bus lock.  IR optimisation treats all these as fences
2098            across which no memory references may be moved.
2099            ppIRStmt output: MBusEvent-Fence,
2100                             MBusEvent-BusLock, MBusEvent-BusUnlock.
2101         */
2102         struct {
2103            IRMBusEvent event;
2104         } MBE;
2105
2106         /* Conditional exit from the middle of an IRSB.
2107            ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2108                         eg. if (t69) goto {Boring} 0x4000AAA:I32
2109         */
2110         struct {
2111            IRExpr*    guard;    /* Conditional expression */
2112            IRJumpKind jk;       /* Jump kind */
2113            IRConst*   dst;      /* Jump target (constant only) */
2114         } Exit;
2115      } Ist;
2116   }
2117   IRStmt;
2118
2119/* Statement constructors. */
2120extern IRStmt* IRStmt_NoOp    ( void );
2121extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len, UChar delta );
2122extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2123extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
2124extern IRStmt* IRStmt_PutI    ( IRRegArray* descr, IRExpr* ix, Int bias,
2125                                IRExpr* data );
2126extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
2127extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
2128extern IRStmt* IRStmt_CAS     ( IRCAS* details );
2129extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
2130                                IRExpr* addr, IRExpr* storedata );
2131extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
2132extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
2133extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
2134
2135/* Deep-copy an IRStmt. */
2136extern IRStmt* deepCopyIRStmt ( IRStmt* );
2137
2138/* Pretty-print an IRStmt. */
2139extern void ppIRStmt ( IRStmt* );
2140
2141
2142/* ------------------ Basic Blocks ------------------ */
2143
2144/* Type environments: a bunch of statements, expressions, etc, are
2145   incomplete without an environment indicating the type of each
2146   IRTemp.  So this provides one.  IR temporaries are really just
2147   unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2148   them.
2149*/
2150typedef
2151   struct {
2152      IRType* types;
2153      Int     types_size;
2154      Int     types_used;
2155   }
2156   IRTypeEnv;
2157
2158/* Obtain a new IRTemp */
2159extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2160
2161/* Deep-copy a type environment */
2162extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2163
2164/* Pretty-print a type environment */
2165extern void ppIRTypeEnv ( IRTypeEnv* );
2166
2167
2168/* Code blocks, which in proper compiler terminology are superblocks
2169   (single entry, multiple exit code sequences) contain:
2170
2171   - A table giving a type for each temp (the "type environment")
2172   - An expandable array of statements
2173   - An expression of type 32 or 64 bits, depending on the
2174     guest's word size, indicating the next destination if the block
2175     executes all the way to the end, without a side exit
2176   - An indication of any special actions (JumpKind) needed
2177     for this final jump.
2178
2179   "IRSB" stands for "IR Super Block".
2180*/
2181typedef
2182   struct {
2183      IRTypeEnv* tyenv;
2184      IRStmt**   stmts;
2185      Int        stmts_size;
2186      Int        stmts_used;
2187      IRExpr*    next;
2188      IRJumpKind jumpkind;
2189   }
2190   IRSB;
2191
2192/* Allocate a new, uninitialised IRSB */
2193extern IRSB* emptyIRSB ( void );
2194
2195/* Deep-copy an IRSB */
2196extern IRSB* deepCopyIRSB ( IRSB* );
2197
2198/* Deep-copy an IRSB, except for the statements list, which set to be
2199   a new, empty, list of statements. */
2200extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2201
2202/* Pretty-print an IRSB */
2203extern void ppIRSB ( IRSB* );
2204
2205/* Append an IRStmt to an IRSB */
2206extern void addStmtToIRSB ( IRSB*, IRStmt* );
2207
2208
2209/*---------------------------------------------------------------*/
2210/*--- Helper functions for the IR                             ---*/
2211/*---------------------------------------------------------------*/
2212
2213/* For messing with IR type environments */
2214extern IRTypeEnv* emptyIRTypeEnv  ( void );
2215
2216/* What is the type of this expression? */
2217extern IRType typeOfIRConst ( IRConst* );
2218extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
2219extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
2220
2221/* Sanity check a BB of IR */
2222extern void sanityCheckIRSB ( IRSB*  bb,
2223                              HChar* caller,
2224                              Bool   require_flatness,
2225                              IRType guest_word_size );
2226extern Bool isFlatIRStmt ( IRStmt* );
2227
2228/* Is this any value actually in the enumeration 'IRType' ? */
2229extern Bool isPlausibleIRType ( IRType ty );
2230
2231#endif /* ndef __LIBVEX_IR_H */
2232
2233
2234/*---------------------------------------------------------------*/
2235/*---                                             libvex_ir.h ---*/
2236/*---------------------------------------------------------------*/
2237