libvex_ir.h revision 74142b8c8d5d3b3db17d744f5d5fb80f548bcf74
1
2/*---------------------------------------------------------------*/
3/*--- begin                                       libvex_ir.h ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2012 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#ifndef __LIBVEX_IR_H
37#define __LIBVEX_IR_H
38
39#include "libvex_basictypes.h"
40
41
42/*---------------------------------------------------------------*/
43/*--- High-level IR description                               ---*/
44/*---------------------------------------------------------------*/
45
46/* Vex IR is an architecture-neutral intermediate representation.
47   Unlike some IRs in systems similar to Vex, it is not like assembly
48   language (ie. a list of instructions).  Rather, it is more like the
49   IR that might be used in a compiler.
50
51   Code blocks
52   ~~~~~~~~~~~
53   The code is broken into small code blocks ("superblocks", type:
54   'IRSB').  Each code block typically represents from 1 to perhaps 50
55   instructions.  IRSBs are single-entry, multiple-exit code blocks.
56   Each IRSB contains three things:
57   - a type environment, which indicates the type of each temporary
58     value present in the IRSB
59   - a list of statements, which represent code
60   - a jump that exits from the end the IRSB
61   Because the blocks are multiple-exit, there can be additional
62   conditional exit statements that cause control to leave the IRSB
63   before the final exit.  Also because of this, IRSBs can cover
64   multiple non-consecutive sequences of code (up to 3).  These are
65   recorded in the type VexGuestExtents (see libvex.h).
66
67   Statements and expressions
68   ~~~~~~~~~~~~~~~~~~~~~~~~~~
69   Statements (type 'IRStmt') represent operations with side-effects,
70   eg.  guest register writes, stores, and assignments to temporaries.
71   Expressions (type 'IRExpr') represent operations without
72   side-effects, eg. arithmetic operations, loads, constants.
73   Expressions can contain sub-expressions, forming expression trees,
74   eg. (3 + (4 * load(addr1)).
75
76   Storage of guest state
77   ~~~~~~~~~~~~~~~~~~~~~~
78   The "guest state" contains the guest registers of the guest machine
79   (ie.  the machine that we are simulating).  It is stored by default
80   in a block of memory supplied by the user of the VEX library,
81   generally referred to as the guest state (area).  To operate on
82   these registers, one must first read ("Get") them from the guest
83   state into a temporary value.  Afterwards, one can write ("Put")
84   them back into the guest state.
85
86   Get and Put are characterised by a byte offset into the guest
87   state, a small integer which effectively gives the identity of the
88   referenced guest register, and a type, which indicates the size of
89   the value to be transferred.
90
91   The basic "Get" and "Put" operations are sufficient to model normal
92   fixed registers on the guest.  Selected areas of the guest state
93   can be treated as a circular array of registers (type:
94   'IRRegArray'), which can be indexed at run-time.  This is done with
95   the "GetI" and "PutI" primitives.  This is necessary to describe
96   rotating register files, for example the x87 FPU stack, SPARC
97   register windows, and the Itanium register files.
98
99   Examples, and flattened vs. unflattened code
100   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101   For example, consider this x86 instruction:
102
103     addl %eax, %ebx
104
105   One Vex IR translation for this code would be this:
106
107     ------ IMark(0x24F275, 7, 0) ------
108     t3 = GET:I32(0)             # get %eax, a 32-bit integer
109     t2 = GET:I32(12)            # get %ebx, a 32-bit integer
110     t1 = Add32(t3,t2)           # addl
111     PUT(0) = t1                 # put %eax
112
113   (For simplicity, this ignores the effects on the condition codes, and
114   the update of the instruction pointer.)
115
116   The "IMark" is an IR statement that doesn't represent actual code.
117   Instead it indicates the address and length of the original
118   instruction.  The numbers 0 and 12 are offsets into the guest state
119   for %eax and %ebx.  The full list of offsets for an architecture
120   <ARCH> can be found in the type VexGuest<ARCH>State in the file
121   VEX/pub/libvex_guest_<ARCH>.h.
122
123   The five statements in this example are:
124   - the IMark
125   - three assignments to temporaries
126   - one register write (put)
127
128   The six expressions in this example are:
129   - two register reads (gets)
130   - one arithmetic (add) operation
131   - three temporaries (two nested within the Add32, one in the PUT)
132
133   The above IR is "flattened", ie. all sub-expressions are "atoms",
134   either constants or temporaries.  An equivalent, unflattened version
135   would be:
136
137     PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139   IR is guaranteed to be flattened at instrumentation-time.  This makes
140   instrumentation easier.  Equivalent flattened and unflattened IR
141   typically results in the same generated code.
142
143   Another example, this one showing loads and stores:
144
145     addl %edx,4(%eax)
146
147   This becomes (again ignoring condition code and instruction pointer
148   updates):
149
150     ------ IMark(0x4000ABA, 3, 0) ------
151     t3 = Add32(GET:I32(0),0x4:I32)
152     t2 = LDle:I32(t3)
153     t1 = GET:I32(8)
154     t0 = Add32(t2,t1)
155     STle(t3) = t0
156
157   The "le" in "LDle" and "STle" is short for "little-endian".
158
159   No need for deallocations
160   ~~~~~~~~~~~~~~~~~~~~~~~~~
161   Although there are allocation functions for various data structures
162   in this file, there are no deallocation functions.  This is because
163   Vex uses a memory allocation scheme that automatically reclaims the
164   memory used by allocated structures once translation is completed.
165   This makes things easier for tools that instruments/transforms code
166   blocks.
167
168   SSAness and typing
169   ~~~~~~~~~~~~~~~~~~
170   The IR is fully typed.  For every IRSB (IR block) it is possible to
171   say unambiguously whether or not it is correctly typed.
172   Incorrectly typed IR has no meaning and the VEX will refuse to
173   process it.  At various points during processing VEX typechecks the
174   IR and aborts if any violations are found.  This seems overkill but
175   makes it a great deal easier to build a reliable JIT.
176
177   IR also has the SSA property.  SSA stands for Static Single
178   Assignment, and what it means is that each IR temporary may be
179   assigned to only once.  This idea became widely used in compiler
180   construction in the mid to late 90s.  It makes many IR-level
181   transformations/code improvements easier, simpler and faster.
182   Whenever it typechecks an IR block, VEX also checks the SSA
183   property holds, and will abort if not so.  So SSAness is
184   mechanically and rigidly enforced.
185*/
186
187/*---------------------------------------------------------------*/
188/*--- Type definitions for the IR                             ---*/
189/*---------------------------------------------------------------*/
190
191/* General comments about naming schemes:
192
193   All publically visible functions contain the name of the primary
194   type on which they operate (IRFoo, IRBar, etc).  Hence you should
195   be able to identify these functions by grepping for "IR[A-Z]".
196
197   For some type 'IRFoo':
198
199   - ppIRFoo is the printing method for IRFoo, printing it to the
200     output channel specified in the LibVEX_Initialise call.
201
202   - eqIRFoo is a structural equality predicate for IRFoos.
203
204   - deepCopyIRFoo is a deep copy constructor for IRFoos.
205     It recursively traverses the entire argument tree and
206     produces a complete new tree.  All types have a deep copy
207     constructor.
208
209   - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210     It creates a new top-level copy of the supplied object,
211     but does not copy any sub-objects.  Only some types have a
212     shallow copy constructor.
213*/
214
215/* ------------------ Types ------------------ */
216
217/* A type indicates the size of a value, and whether it's an integer, a
218   float, or a vector (SIMD) value. */
219typedef
220   enum {
221      Ity_INVALID=0x1100,
222      Ity_I1,
223      Ity_I8,
224      Ity_I16,
225      Ity_I32,
226      Ity_I64,
227      Ity_I128,  /* 128-bit scalar */
228      Ity_F32,   /* IEEE 754 float */
229      Ity_F64,   /* IEEE 754 double */
230      Ity_D32,   /* 32-bit Decimal floating point */
231      Ity_D64,   /* 64-bit Decimal floating point */
232      Ity_D128,  /* 128-bit Decimal floating point */
233      Ity_F128,  /* 128-bit floating point; implementation defined */
234      Ity_V128,  /* 128-bit SIMD */
235      Ity_V256   /* 256-bit SIMD */
236   }
237   IRType;
238
239/* Pretty-print an IRType */
240extern void ppIRType ( IRType );
241
242/* Get the size (in bytes) of an IRType */
243extern Int sizeofIRType ( IRType );
244
245
246/* ------------------ Endianness ------------------ */
247
248/* IREndness is used in load IRExprs and store IRStmts. */
249typedef
250   enum {
251      Iend_LE=0x1200, /* little endian */
252      Iend_BE          /* big endian */
253   }
254   IREndness;
255
256
257/* ------------------ Constants ------------------ */
258
259/* IRConsts are used within 'Const' and 'Exit' IRExprs. */
260
261/* The various kinds of constant. */
262typedef
263   enum {
264      Ico_U1=0x1300,
265      Ico_U8,
266      Ico_U16,
267      Ico_U32,
268      Ico_U64,
269      Ico_F32,   /* 32-bit IEEE754 floating */
270      Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
271                    as a IEEE754 single value. */
272      Ico_F64,   /* 64-bit IEEE754 floating */
273      Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
274                    as a IEEE754 double value. */
275      Ico_V128,  /* 128-bit restricted vector constant, with 1 bit
276                    (repeated 8 times) for each of the 16 x 1-byte lanes */
277      Ico_V256   /* 256-bit restricted vector constant, with 1 bit
278                    (repeated 8 times) for each of the 32 x 1-byte lanes */
279   }
280   IRConstTag;
281
282/* A constant.  Stored as a tagged union.  'tag' indicates what kind of
283   constant this is.  'Ico' is the union that holds the fields.  If an
284   IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
285   and its value can be accessed with 'c.Ico.U32'. */
286typedef
287   struct _IRConst {
288      IRConstTag tag;
289      union {
290         Bool   U1;
291         UChar  U8;
292         UShort U16;
293         UInt   U32;
294         ULong  U64;
295         Float  F32;
296         UInt   F32i;
297         Double F64;
298         ULong  F64i;
299         UShort V128;   /* 16-bit value; see Ico_V128 comment above */
300         UInt   V256;   /* 32-bit value; see Ico_V256 comment above */
301      } Ico;
302   }
303   IRConst;
304
305/* IRConst constructors */
306extern IRConst* IRConst_U1   ( Bool );
307extern IRConst* IRConst_U8   ( UChar );
308extern IRConst* IRConst_U16  ( UShort );
309extern IRConst* IRConst_U32  ( UInt );
310extern IRConst* IRConst_U64  ( ULong );
311extern IRConst* IRConst_F32  ( Float );
312extern IRConst* IRConst_F32i ( UInt );
313extern IRConst* IRConst_F64  ( Double );
314extern IRConst* IRConst_F64i ( ULong );
315extern IRConst* IRConst_V128 ( UShort );
316extern IRConst* IRConst_V256 ( UInt );
317
318/* Deep-copy an IRConst */
319extern IRConst* deepCopyIRConst ( IRConst* );
320
321/* Pretty-print an IRConst */
322extern void ppIRConst ( IRConst* );
323
324/* Compare two IRConsts for equality */
325extern Bool eqIRConst ( IRConst*, IRConst* );
326
327
328/* ------------------ Call targets ------------------ */
329
330/* Describes a helper function to call.  The name part is purely for
331   pretty printing and not actually used.  regparms=n tells the back
332   end that the callee has been declared
333   "__attribute__((regparm(n)))", although indirectly using the
334   VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
335   to construct a non-standard sequence to call a function declared
336   like this.
337
338   mcx_mask is a sop to Memcheck.  It indicates which args should be
339   considered 'always defined' when lazily computing definedness of
340   the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
341   args[1], etc.  If a bit is set, the corresponding arg is excluded
342   (hence "x" in "mcx") from definedness checking.
343*/
344
345typedef
346   struct {
347      Int          regparms;
348      const HChar* name;
349      void*        addr;
350      UInt         mcx_mask;
351   }
352   IRCallee;
353
354/* Create an IRCallee. */
355extern IRCallee* mkIRCallee ( Int regparms, const HChar* name, void* addr );
356
357/* Deep-copy an IRCallee. */
358extern IRCallee* deepCopyIRCallee ( IRCallee* );
359
360/* Pretty-print an IRCallee. */
361extern void ppIRCallee ( IRCallee* );
362
363
364/* ------------------ Guest state arrays ------------------ */
365
366/* This describes a section of the guest state that we want to
367   be able to index at run time, so as to be able to describe
368   indexed or rotating register files on the guest. */
369typedef
370   struct {
371      Int    base;   /* guest state offset of start of indexed area */
372      IRType elemTy; /* type of each element in the indexed area */
373      Int    nElems; /* number of elements in the indexed area */
374   }
375   IRRegArray;
376
377extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
378
379extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
380
381extern void ppIRRegArray ( IRRegArray* );
382extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
383
384
385/* ------------------ Temporaries ------------------ */
386
387/* This represents a temporary, eg. t1.  The IR optimiser relies on the
388   fact that IRTemps are 32-bit ints.  Do not change them to be ints of
389   any other size. */
390typedef UInt IRTemp;
391
392/* Pretty-print an IRTemp. */
393extern void ppIRTemp ( IRTemp );
394
395#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
396
397
398/* --------------- Primops (arity 1,2,3 and 4) --------------- */
399
400/* Primitive operations that are used in Unop, Binop, Triop and Qop
401   IRExprs.  Once we take into account integer, floating point and SIMD
402   operations of all the different sizes, there are quite a lot of them.
403   Most instructions supported by the architectures that Vex supports
404   (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
405   are not;  they are instead handled with dirty helpers that emulate
406   their functionality.  Such obscure ones are thus not directly visible
407   in the IR, but their effects on guest state (memory and registers)
408   are made visible via the annotations in IRDirty structures.
409*/
410typedef
411   enum {
412      /* -- Do not change this ordering.  The IR generators rely on
413            (eg) Iop_Add64 == IopAdd8 + 3. -- */
414
415      Iop_INVALID=0x1400,
416      Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
417      Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
418      /* Signless mul.  MullS/MullU is elsewhere. */
419      Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
420      Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
421      Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
422      Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
423      Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
424      Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
425      Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
426      /* Integer comparisons. */
427      Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
428      Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
429      /* Tags for unary ops */
430      Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
431
432      /* Exactly like CmpEQ8/16/32/64, but carrying the additional
433         hint that these compute the success/failure of a CAS
434         operation, and hence are almost certainly applied to two
435         copies of the same value, which in turn has implications for
436         Memcheck's instrumentation. */
437      Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
438      Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
439
440      /* Exactly like CmpNE8/16/32/64, but carrying the additional
441         hint that these needs expensive definedness tracking. */
442      Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64,
443
444      /* -- Ordering not important after here. -- */
445
446      /* Widening multiplies */
447      Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
448      Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
449
450      /* Wierdo integer stuff */
451      Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
452      Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
453      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
454         zero.  You must ensure they are never given a zero argument.
455      */
456
457      /* Standard integer comparisons */
458      Iop_CmpLT32S, Iop_CmpLT64S,
459      Iop_CmpLE32S, Iop_CmpLE64S,
460      Iop_CmpLT32U, Iop_CmpLT64U,
461      Iop_CmpLE32U, Iop_CmpLE64U,
462
463      /* As a sop to Valgrind-Memcheck, the following are useful. */
464      Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
465      Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
466      Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
467      Iop_Max32U, /* unsigned max */
468
469      /* PowerPC-style 3-way integer comparisons.  Without them it is
470         difficult to simulate PPC efficiently.
471         op(x,y) | x < y  = 0x8 else
472                 | x > y  = 0x4 else
473                 | x == y = 0x2
474      */
475      Iop_CmpORD32U, Iop_CmpORD64U,
476      Iop_CmpORD32S, Iop_CmpORD64S,
477
478      /* Division */
479      /* TODO: clarify semantics wrt rounding, negative values, whatever */
480      Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
481      Iop_DivS32,   // ditto, signed
482      Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
483      Iop_DivS64,   // ditto, signed
484      Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
485      Iop_DivS64E,  // ditto, signed
486      Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
487      Iop_DivS32E,  // ditto, signed
488
489      Iop_DivModU64to32, // :: I64,I32 -> I64
490                         // of which lo half is div and hi half is mod
491      Iop_DivModS64to32, // ditto, signed
492
493      Iop_DivModU128to64, // :: V128,I64 -> V128
494                          // of which lo half is div and hi half is mod
495      Iop_DivModS128to64, // ditto, signed
496
497      Iop_DivModS64to64, // :: I64,I64 -> I128
498                         // of which lo half is div and hi half is mod
499
500      /* Integer conversions.  Some of these are redundant (eg
501         Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
502         having a complete set reduces the typical dynamic size of IR
503         and makes the instruction selectors easier to write. */
504
505      /* Widening conversions */
506      Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
507                  Iop_16Uto32, Iop_16Uto64,
508                               Iop_32Uto64,
509      Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
510                  Iop_16Sto32, Iop_16Sto64,
511                               Iop_32Sto64,
512
513      /* Narrowing conversions */
514      Iop_64to8, Iop_32to8, Iop_64to16,
515      /* 8 <-> 16 bit conversions */
516      Iop_16to8,      // :: I16 -> I8, low half
517      Iop_16HIto8,    // :: I16 -> I8, high half
518      Iop_8HLto16,    // :: (I8,I8) -> I16
519      /* 16 <-> 32 bit conversions */
520      Iop_32to16,     // :: I32 -> I16, low half
521      Iop_32HIto16,   // :: I32 -> I16, high half
522      Iop_16HLto32,   // :: (I16,I16) -> I32
523      /* 32 <-> 64 bit conversions */
524      Iop_64to32,     // :: I64 -> I32, low half
525      Iop_64HIto32,   // :: I64 -> I32, high half
526      Iop_32HLto64,   // :: (I32,I32) -> I64
527      /* 64 <-> 128 bit conversions */
528      Iop_128to64,    // :: I128 -> I64, low half
529      Iop_128HIto64,  // :: I128 -> I64, high half
530      Iop_64HLto128,  // :: (I64,I64) -> I128
531      /* 1-bit stuff */
532      Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
533      Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
534      Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
535      Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
536      Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
537      Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
538      Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
539      Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
540      Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
541      Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
542
543      /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
544
545      /* --- Simple stuff as mandated by 754. --- */
546
547      /* Binary operations, with rounding. */
548      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
549      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
550
551      /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
552      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
553
554      /* Variants of the above which produce a 64-bit result but which
555         round their result to a IEEE float range first. */
556      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
557      Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
558
559      /* Unary operations, without rounding. */
560      /* :: F64 -> F64 */
561      Iop_NegF64, Iop_AbsF64,
562
563      /* :: F32 -> F32 */
564      Iop_NegF32, Iop_AbsF32,
565
566      /* Unary operations, with rounding. */
567      /* :: IRRoundingMode(I32) x F64 -> F64 */
568      Iop_SqrtF64,
569
570      /* :: IRRoundingMode(I32) x F32 -> F32 */
571      Iop_SqrtF32,
572
573      /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
574            0x45 Unordered
575            0x01 LT
576            0x00 GT
577            0x40 EQ
578         This just happens to be the Intel encoding.  The values
579         are recorded in the type IRCmpF64Result.
580      */
581      /* :: F64 x F64 -> IRCmpF64Result(I32) */
582      Iop_CmpF64,
583      Iop_CmpF32,
584      Iop_CmpF128,
585
586      /* --- Int to/from FP conversions. --- */
587
588      /* For the most part, these take a first argument :: Ity_I32 (as
589         IRRoundingMode) which is an indication of the rounding mode
590         to use, as per the following encoding ("the standard
591         encoding"):
592            00b  to nearest (the default)
593            01b  to -infinity
594            10b  to +infinity
595            11b  to zero
596         This just happens to be the Intel encoding.  For reference only,
597         the PPC encoding is:
598            00b  to nearest (the default)
599            01b  to zero
600            10b  to +infinity
601            11b  to -infinity
602         Any PPC -> IR front end will have to translate these PPC
603         encodings, as encoded in the guest state, to the standard
604         encodings, to pass to the primops.
605         For reference only, the ARM VFP encoding is:
606            00b  to nearest
607            01b  to +infinity
608            10b  to -infinity
609            11b  to zero
610         Again, this will have to be converted to the standard encoding
611         to pass to primops.
612
613         If one of these conversions gets an out-of-range condition,
614         or a NaN, as an argument, the result is host-defined.  On x86
615         the "integer indefinite" value 0x80..00 is produced.  On PPC
616         it is either 0x80..00 or 0x7F..FF depending on the sign of
617         the argument.
618
619         On ARMvfp, when converting to a signed integer result, the
620         overflow result is 0x80..00 for negative args and 0x7F..FF
621         for positive args.  For unsigned integer results it is
622         0x00..00 and 0xFF..FF respectively.
623
624         Rounding is required whenever the destination type cannot
625         represent exactly all values of the source type.
626      */
627      Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
628      Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
629      Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
630      Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
631
632      Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
633
634      Iop_I32StoF64, /*                       signed I32 -> F64 */
635      Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
636      Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
637      Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
638
639      Iop_I32UtoF32, /* IRRoundingMode(I32) x unsigned I32 -> F32 */
640      Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
641
642      Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
643      Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
644      Iop_F32toI32U, /* IRRoundingMode(I32) x F32 -> unsigned I32 */
645      Iop_F32toI64U, /* IRRoundingMode(I32) x F32 -> unsigned I64 */
646
647      Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
648      Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
649
650      /* Conversion between floating point formats */
651      Iop_F32toF64,  /*                       F32 -> F64 */
652      Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
653
654      /* Reinterpretation.  Take an F64 and produce an I64 with
655         the same bit pattern, or vice versa. */
656      Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
657      Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
658
659      /* Support for 128-bit floating point */
660      Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
661      Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
662      Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
663
664      /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
665      Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
666
667      /* :: F128 -> F128 */
668      Iop_NegF128, Iop_AbsF128,
669
670      /* :: IRRoundingMode(I32) x F128 -> F128 */
671      Iop_SqrtF128,
672
673      Iop_I32StoF128, /*                signed I32  -> F128 */
674      Iop_I64StoF128, /*                signed I64  -> F128 */
675      Iop_I32UtoF128, /*              unsigned I32  -> F128 */
676      Iop_I64UtoF128, /*              unsigned I64  -> F128 */
677      Iop_F32toF128,  /*                       F32  -> F128 */
678      Iop_F64toF128,  /*                       F64  -> F128 */
679
680      Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
681      Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
682      Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32  */
683      Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64  */
684      Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
685      Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
686
687      /* --- guest x86/amd64 specifics, not mandated by 754. --- */
688
689      /* Binary ops, with rounding. */
690      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
691      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
692      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
693      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
694      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
695      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
696      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
697      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
698      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
699      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
700         as the IEEE mandated RemF64, except it is limited in the
701         range of its operand.  Hence the partialness. */
702
703      /* Unary ops, with rounding. */
704      /* :: IRRoundingMode(I32) x F64 -> F64 */
705      Iop_SinF64,    /* FSIN */
706      Iop_CosF64,    /* FCOS */
707      Iop_TanF64,    /* FTAN */
708      Iop_2xm1F64,   /* (2^arg - 1.0) */
709      Iop_RoundF64toInt, /* F64 value to nearest integral value (still
710                            as F64) */
711      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
712                            as F32) */
713
714      /* --- guest s390 specifics, not mandated by 754. --- */
715
716      /* Fused multiply-add/sub */
717      /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
718            (computes arg2 * arg3 +/- arg4) */
719      Iop_MAddF32, Iop_MSubF32,
720
721      /* --- guest ppc32/64 specifics, not mandated by 754. --- */
722
723      /* Ternary operations, with rounding. */
724      /* Fused multiply-add/sub, with 112-bit intermediate
725         precision for ppc.
726         Also used to implement fused multiply-add/sub for s390. */
727      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
728            (computes arg2 * arg3 +/- arg4) */
729      Iop_MAddF64, Iop_MSubF64,
730
731      /* Variants of the above which produce a 64-bit result but which
732         round their result to a IEEE float range first. */
733      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
734      Iop_MAddF64r32, Iop_MSubF64r32,
735
736      /* :: F64 -> F64 */
737      Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
738      Iop_RoundF64toF64_NEAREST, /* frin */
739      Iop_RoundF64toF64_NegINF,  /* frim */
740      Iop_RoundF64toF64_PosINF,  /* frip */
741      Iop_RoundF64toF64_ZERO,    /* friz */
742
743      /* :: F64 -> F32 */
744      Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
745
746      /* :: IRRoundingMode(I32) x F64 -> F64 */
747      Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
748      /* NB: pretty much the same as Iop_F64toF32, except no change
749         of type. */
750
751      /* ------------------ 32-bit SIMD Integer ------------------ */
752
753      /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
754      Iop_QAdd32S,
755      Iop_QSub32S,
756
757      /* 16x2 add/sub, also signed/unsigned saturating variants */
758      Iop_Add16x2, Iop_Sub16x2,
759      Iop_QAdd16Sx2, Iop_QAdd16Ux2,
760      Iop_QSub16Sx2, Iop_QSub16Ux2,
761
762      /* 16x2 signed/unsigned halving add/sub.  For each lane, these
763         compute bits 16:1 of (eg) sx(argL) + sx(argR),
764         or zx(argL) - zx(argR) etc. */
765      Iop_HAdd16Ux2, Iop_HAdd16Sx2,
766      Iop_HSub16Ux2, Iop_HSub16Sx2,
767
768      /* 8x4 add/sub, also signed/unsigned saturating variants */
769      Iop_Add8x4, Iop_Sub8x4,
770      Iop_QAdd8Sx4, Iop_QAdd8Ux4,
771      Iop_QSub8Sx4, Iop_QSub8Ux4,
772
773      /* 8x4 signed/unsigned halving add/sub.  For each lane, these
774         compute bits 8:1 of (eg) sx(argL) + sx(argR),
775         or zx(argL) - zx(argR) etc. */
776      Iop_HAdd8Ux4, Iop_HAdd8Sx4,
777      Iop_HSub8Ux4, Iop_HSub8Sx4,
778
779      /* 8x4 sum of absolute unsigned differences. */
780      Iop_Sad8Ux4,
781
782      /* MISC (vector integer cmp != 0) */
783      Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
784
785      /* ------------------ 64-bit SIMD FP ------------------------ */
786
787      /* Convertion to/from int */
788      Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
789      Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
790      /* Fixed32 format is floating-point number with fixed number of fraction
791         bits. The number of fraction bits is passed as a second argument of
792         type I8. */
793      Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
794      Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
795
796      /* Binary operations */
797      Iop_Max32Fx2,      Iop_Min32Fx2,
798      /* Pairwise Min and Max. See integer pairwise operations for more
799         details. */
800      Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
801      /* Note: For the following compares, the arm front-end assumes a
802         nan in a lane of either argument returns zero for that lane. */
803      Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
804
805      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
806      element in the operand vector, and places the results in the destination
807      vector.  */
808      Iop_Recip32Fx2,
809
810      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
811         Note, that if one of the arguments is zero and another one is infinity
812         of arbitrary sign the result of the operation is 2.0. */
813      Iop_Recps32Fx2,
814
815      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
816         square root of each element in the operand vector. */
817      Iop_Rsqrte32Fx2,
818
819      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
820         Note, that of one of the arguments is zero and another one is infiinty
821         of arbitrary sign the result of the operation is 1.5. */
822      Iop_Rsqrts32Fx2,
823
824      /* Unary */
825      Iop_Neg32Fx2, Iop_Abs32Fx2,
826
827      /* ------------------ 64-bit SIMD Integer. ------------------ */
828
829      /* MISC (vector integer cmp != 0) */
830      Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
831
832      /* ADDITION (normal / unsigned sat / signed sat) */
833      Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
834      Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
835      Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
836
837      /* PAIRWISE operations */
838      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
839            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
840      Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
841      Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
842      Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
843      Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
844      Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
845      /* Longening variant is unary. The resulting vector contains two times
846         less elements than operand, but they are two times wider.
847         Example:
848            Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
849               where a+b and c+d are unsigned 32-bit values. */
850      Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
851      Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
852
853      /* SUBTRACTION (normal / unsigned sat / signed sat) */
854      Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
855      Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
856      Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
857
858      /* ABSOLUTE VALUE */
859      Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
860
861      /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
862      Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
863      Iop_Mul32Fx2,
864      Iop_MulHi16Ux4,
865      Iop_MulHi16Sx4,
866      /* Plynomial multiplication treats it's arguments as coefficients of
867         polynoms over {0, 1}. */
868      Iop_PolynomialMul8x8,
869
870      /* Vector Saturating Doubling Multiply Returning High Half and
871         Vector Saturating Rounding Doubling Multiply Returning High Half */
872      /* These IROp's multiply corresponding elements in two vectors, double
873         the results, and place the most significant half of the final results
874         in the destination vector. The results are truncated or rounded. If
875         any of the results overflow, they are saturated. */
876      Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
877      Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
878
879      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
880      Iop_Avg8Ux8,
881      Iop_Avg16Ux4,
882
883      /* MIN/MAX */
884      Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
885      Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
886      Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
887      Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
888
889      /* COMPARISON */
890      Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
891      Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
892      Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
893
894      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
895         bit) */
896      Iop_Cnt8x8,
897      Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
898      Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
899
900      /* VECTOR x VECTOR SHIFT / ROTATE */
901      Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
902      Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
903      Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
904      Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
905
906      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
907      Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
908      Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
909      Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
910
911      /* VECTOR x VECTOR SATURATING SHIFT */
912      Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
913      Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
914      /* VECTOR x INTEGER SATURATING SHIFT */
915      Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
916      Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
917      Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
918
919      /* NARROWING (binary)
920         -- narrow 2xI64 into 1xI64, hi half from left arg */
921      /* For saturated narrowing, I believe there are 4 variants of
922         the basic arithmetic operation, depending on the signedness
923         of argument and result.  Here are examples that exemplify
924         what I mean:
925
926         QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
927                                      return x[7:0];
928
929         QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
930                                      if (x >s  127) x = 127;
931                                      return x[7:0];
932
933         QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
934                                      return x[7:0];
935
936         QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
937                                      if (x >s 255) x = 255;
938                                      return x[7:0];
939      */
940      Iop_QNarrowBin16Sto8Ux8,
941      Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
942      Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
943
944      /* INTERLEAVING */
945      /* Interleave lanes from low or high halves of
946         operands.  Most-significant result lane is from the left
947         arg. */
948      Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
949      Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
950      /* Interleave odd/even lanes of operands.  Most-significant result lane
951         is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
952         identical to Interleave{HI,LO}32x2 and so are omitted.*/
953      Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
954      Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
955
956      /* CONCATENATION -- build a new value by concatenating either
957         the even or odd lanes of both operands.  Note that
958         Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
959         and so are omitted. */
960      Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
961      Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
962
963      /* GET / SET elements of VECTOR
964         GET is binop (I64, I8) -> I<elem_size>
965         SET is triop (I64, I8, I<elem_size>) -> I64 */
966      /* Note: the arm back-end handles only constant second argument */
967      Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
968      Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
969
970      /* DUPLICATING -- copy value to all lanes */
971      Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
972
973      /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
974         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
975         result.
976         It is a triop: (I64, I64, I8) -> I64 */
977      /* Note: the arm back-end handles only constant third argumnet. */
978      Iop_Extract64,
979
980      /* REVERSE the order of elements in each Half-words, Words,
981         Double-words */
982      /* Examples:
983            Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
984            Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
985            Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
986      Iop_Reverse16_8x8,
987      Iop_Reverse32_8x8, Iop_Reverse32_16x4,
988      Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
989
990      /* PERMUTING -- copy src bytes to dst,
991         as indexed by control vector bytes:
992            for i in 0 .. 7 . result[i] = argL[ argR[i] ]
993         argR[i] values may only be in the range 0 .. 7, else behaviour
994         is undefined. */
995      Iop_Perm8x8,
996
997      /* MISC CONVERSION -- get high bits of each byte lane, a la
998         x86/amd64 pmovmskb */
999      Iop_GetMSBs8x8, /* I64 -> I8 */
1000
1001      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1002         See floating-point equiwalents for details. */
1003      Iop_Recip32x2, Iop_Rsqrte32x2,
1004
1005      /* ------------------ Decimal Floating Point ------------------ */
1006
1007      /* ARITHMETIC INSTRUCTIONS   64-bit
1008	 ----------------------------------
1009	 IRRoundingMode(I32) X D64 X D64 -> D64
1010      */
1011      Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64,
1012
1013      /* ARITHMETIC INSTRUCTIONS  128-bit
1014	 ----------------------------------
1015	 IRRoundingMode(I32) X D128 X D128 -> D128
1016      */
1017      Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128,
1018
1019      /* SHIFT SIGNIFICAND INSTRUCTIONS
1020       *    The DFP significand is shifted by the number of digits specified
1021       *    by the U8 operand.  Digits shifted out of the leftmost digit are
1022       *    lost. Zeros are supplied to the vacated positions on the right.
1023       *    The sign of the result is the same as the sign of the original
1024       *    operand.
1025       *
1026       * D64 x U8  -> D64    left shift and right shift respectively */
1027      Iop_ShlD64, Iop_ShrD64,
1028
1029      /* D128 x U8  -> D128  left shift and right shift respectively */
1030      Iop_ShlD128, Iop_ShrD128,
1031
1032
1033      /* FORMAT CONVERSION INSTRUCTIONS
1034       *   D32 -> D64
1035       */
1036      Iop_D32toD64,
1037
1038      /*   D64 -> D128 */
1039      Iop_D64toD128,
1040
1041      /*   I32S -> D128 */
1042      Iop_I32StoD128,
1043
1044      /*   I32U -> D128 */
1045      Iop_I32UtoD128,
1046
1047      /*   I64S -> D128 */
1048      Iop_I64StoD128,
1049
1050      /*   I64U -> D128 */
1051      Iop_I64UtoD128,
1052
1053      /*   IRRoundingMode(I32) x D64 -> D32 */
1054      Iop_D64toD32,
1055
1056      /*   IRRoundingMode(I32) x D128 -> D64 */
1057      Iop_D128toD64,
1058
1059      /*   I32S -> D64 */
1060      Iop_I32StoD64,
1061
1062      /*   I32U -> D64 */
1063      Iop_I32UtoD64,
1064
1065      /*   IRRoundingMode(I32) x I64 -> D64 */
1066      Iop_I64StoD64,
1067
1068      /*   IRRoundingMode(I32) x I64 -> D64 */
1069      Iop_I64UtoD64,
1070
1071      /*   IRRoundingMode(I32) x D64 -> I32 */
1072      Iop_D64toI32S,
1073
1074      /*   IRRoundingMode(I32) x D64 -> I32 */
1075      Iop_D64toI32U,
1076
1077      /*   IRRoundingMode(I32) x D64 -> I64 */
1078      Iop_D64toI64S,
1079
1080      /*   IRRoundingMode(I32) x D64 -> I64 */
1081      Iop_D64toI64U,
1082
1083      /*   IRRoundingMode(I32) x D128 -> I32 */
1084      Iop_D128toI32S,
1085
1086      /*   IRRoundingMode(I32) x D128 -> I32 */
1087      Iop_D128toI32U,
1088
1089      /*   IRRoundingMode(I32) x D128 -> I64 */
1090      Iop_D128toI64S,
1091
1092      /*   IRRoundingMode(I32) x D128 -> I64 */
1093      Iop_D128toI64U,
1094
1095      /*   IRRoundingMode(I32) x F32 -> D32 */
1096      Iop_F32toD32,
1097
1098      /*   IRRoundingMode(I32) x F32 -> D64 */
1099      Iop_F32toD64,
1100
1101      /*   IRRoundingMode(I32) x F32 -> D128 */
1102      Iop_F32toD128,
1103
1104      /*   IRRoundingMode(I32) x F64 -> D32 */
1105      Iop_F64toD32,
1106
1107      /*   IRRoundingMode(I32) x F64 -> D64 */
1108      Iop_F64toD64,
1109
1110      /*   IRRoundingMode(I32) x F64 -> D128 */
1111      Iop_F64toD128,
1112
1113      /*   IRRoundingMode(I32) x F128 -> D32 */
1114      Iop_F128toD32,
1115
1116      /*   IRRoundingMode(I32) x F128 -> D64 */
1117      Iop_F128toD64,
1118
1119      /*   IRRoundingMode(I32) x F128 -> D128 */
1120      Iop_F128toD128,
1121
1122      /*   IRRoundingMode(I32) x D32 -> F32 */
1123      Iop_D32toF32,
1124
1125      /*   IRRoundingMode(I32) x D32 -> F64 */
1126      Iop_D32toF64,
1127
1128      /*   IRRoundingMode(I32) x D32 -> F128 */
1129      Iop_D32toF128,
1130
1131      /*   IRRoundingMode(I32) x D64 -> F32 */
1132      Iop_D64toF32,
1133
1134      /*   IRRoundingMode(I32) x D64 -> F64 */
1135      Iop_D64toF64,
1136
1137      /*   IRRoundingMode(I32) x D64 -> F128 */
1138      Iop_D64toF128,
1139
1140      /*   IRRoundingMode(I32) x D128 -> F32 */
1141      Iop_D128toF32,
1142
1143      /*   IRRoundingMode(I32) x D128 -> F64 */
1144      Iop_D128toF64,
1145
1146      /*   IRRoundingMode(I32) x D128 -> F128 */
1147      Iop_D128toF128,
1148
1149      /* ROUNDING INSTRUCTIONS
1150       * IRRoundingMode(I32) x D64 -> D64
1151       * The D64 operand, if a finite number, it is rounded to a
1152       * floating point integer value, i.e. no fractional part.
1153       */
1154      Iop_RoundD64toInt,
1155
1156      /* IRRoundingMode(I32) x D128 -> D128 */
1157      Iop_RoundD128toInt,
1158
1159      /* COMPARE INSTRUCTIONS
1160       * D64 x D64 -> IRCmpD64Result(I32) */
1161      Iop_CmpD64,
1162
1163      /* D128 x D128 -> IRCmpD128Result(I32) */
1164      Iop_CmpD128,
1165
1166      /* COMPARE BIASED EXPONENET INSTRUCTIONS
1167       * D64 x D64 -> IRCmpD64Result(I32) */
1168      Iop_CmpExpD64,
1169
1170      /* D128 x D128 -> IRCmpD128Result(I32) */
1171      Iop_CmpExpD128,
1172
1173      /* QUANTIZE AND ROUND INSTRUCTIONS
1174       * The source operand is converted and rounded to the form with the
1175       * immediate exponent specified by the rounding and exponent parameter.
1176       *
1177       * The second operand is converted and rounded to the form
1178       * of the first operand's exponent and the rounded based on the specified
1179       * rounding mode parameter.
1180       *
1181       * IRRoundingMode(I32) x D64 x D64-> D64 */
1182      Iop_QuantizeD64,
1183
1184      /* IRRoundingMode(I32) x D128 x D128 -> D128 */
1185      Iop_QuantizeD128,
1186
1187      /* IRRoundingMode(I32) x I8 x D64 -> D64
1188       *    The Decimal Floating point operand is rounded to the requested
1189       *    significance given by the I8 operand as specified by the rounding
1190       *    mode.
1191       */
1192      Iop_SignificanceRoundD64,
1193
1194      /* IRRoundingMode(I32) x I8 x D128 -> D128 */
1195      Iop_SignificanceRoundD128,
1196
1197      /* EXTRACT AND INSERT INSTRUCTIONS
1198       * D64 -> I64
1199       *    The exponent of the D32 or D64 operand is extracted.  The
1200       *    extracted exponent is converted to a 64-bit signed binary integer.
1201       */
1202      Iop_ExtractExpD64,
1203
1204      /* D128 -> I64 */
1205      Iop_ExtractExpD128,
1206
1207      /* D64 -> I64
1208       * The number of significand digits of the D64 operand is extracted.
1209       * The number is stored as a 64-bit signed binary integer.
1210       */
1211      Iop_ExtractSigD64,
1212
1213      /* D128 -> I64 */
1214      Iop_ExtractSigD128,
1215
1216      /* I64 x D64  -> D64
1217       *    The exponent is specified by the first I64 operand the signed
1218       *    significand is given by the second I64 value.  The result is a D64
1219       *    value consisting of the specified significand and exponent whose
1220       *    sign is that of the specified significand.
1221       */
1222      Iop_InsertExpD64,
1223
1224      /* I64 x D128 -> D128 */
1225      Iop_InsertExpD128,
1226
1227      /* Support for 128-bit DFP type */
1228      Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64,
1229
1230      /*  I64 -> I64
1231       *     Convert 50-bit densely packed BCD string to 60 bit BCD string
1232       */
1233      Iop_DPBtoBCD,
1234
1235      /* I64 -> I64
1236       *     Convert 60 bit BCD string to 50-bit densely packed BCD string
1237       */
1238      Iop_BCDtoDPB,
1239
1240      /* Conversion I64 -> D64 */
1241      Iop_ReinterpI64asD64,
1242
1243      /* Conversion D64 -> I64 */
1244      Iop_ReinterpD64asI64,
1245
1246      /* ------------------ 128-bit SIMD FP. ------------------ */
1247
1248      /* --- 32x4 vector FP --- */
1249
1250      /* binary */
1251      Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
1252      Iop_Max32Fx4, Iop_Min32Fx4,
1253      Iop_Add32Fx2, Iop_Sub32Fx2,
1254      /* Note: For the following compares, the ppc and arm front-ends assume a
1255         nan in a lane of either argument returns zero for that lane. */
1256      Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
1257      Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
1258
1259      /* Vector Absolute */
1260      Iop_Abs32Fx4,
1261
1262      /* Pairwise Max and Min. See integer pairwise operations for details. */
1263      Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1264
1265      /* unary */
1266      Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1267      Iop_Neg32Fx4,
1268
1269      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1270      element in the operand vector, and places the results in the destination
1271      vector.  */
1272      Iop_Recip32Fx4,
1273
1274      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1275         Note, that if one of the arguments is zero and another one is infinity
1276         of arbitrary sign the result of the operation is 2.0. */
1277      Iop_Recps32Fx4,
1278
1279      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1280         square root of each element in the operand vector. */
1281      Iop_Rsqrte32Fx4,
1282
1283      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1284         Note, that of one of the arguments is zero and another one is infiinty
1285         of arbitrary sign the result of the operation is 1.5. */
1286      Iop_Rsqrts32Fx4,
1287
1288      /* --- Int to/from FP conversion --- */
1289      /* Unlike the standard fp conversions, these irops take no
1290         rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1291         indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1292      Iop_I32UtoFx4,  Iop_I32StoFx4,       /* I32x4 -> F32x4       */
1293      Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
1294      Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (with saturation) */
1295      Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
1296      Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
1297      /* Fixed32 format is floating-point number with fixed number of fraction
1298         bits. The number of fraction bits is passed as a second argument of
1299         type I8. */
1300      Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1301      Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1302
1303      /* --- Single to/from half conversion --- */
1304      /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1305      Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
1306
1307      /* --- 32x4 lowest-lane-only scalar FP --- */
1308
1309      /* In binary cases, upper 3/4 is copied from first operand.  In
1310         unary cases, upper 3/4 is copied from the operand. */
1311
1312      /* binary */
1313      Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1314      Iop_Max32F0x4, Iop_Min32F0x4,
1315      Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1316
1317      /* unary */
1318      Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1319
1320      /* --- 64x2 vector FP --- */
1321
1322      /* binary */
1323      Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1324      Iop_Max64Fx2, Iop_Min64Fx2,
1325      Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1326
1327      /* unary */
1328      Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1329
1330      /* --- 64x2 lowest-lane-only scalar FP --- */
1331
1332      /* In binary cases, upper half is copied from first operand.  In
1333         unary cases, upper half is copied from the operand. */
1334
1335      /* binary */
1336      Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1337      Iop_Max64F0x2, Iop_Min64F0x2,
1338      Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1339
1340      /* unary */
1341      Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1342
1343      /* --- pack / unpack --- */
1344
1345      /* 64 <-> 128 bit vector */
1346      Iop_V128to64,     // :: V128 -> I64, low half
1347      Iop_V128HIto64,   // :: V128 -> I64, high half
1348      Iop_64HLtoV128,   // :: (I64,I64) -> V128
1349
1350      Iop_64UtoV128,
1351      Iop_SetV128lo64,
1352
1353      /* 32 <-> 128 bit vector */
1354      Iop_32UtoV128,
1355      Iop_V128to32,     // :: V128 -> I32, lowest lane
1356      Iop_SetV128lo32,  // :: (V128,I32) -> V128
1357
1358      /* ------------------ 128-bit SIMD Integer. ------------------ */
1359
1360      /* BITWISE OPS */
1361      Iop_NotV128,
1362      Iop_AndV128, Iop_OrV128, Iop_XorV128,
1363
1364      /* VECTOR SHIFT (shift amt :: Ity_I8) */
1365      Iop_ShlV128, Iop_ShrV128,
1366
1367      /* MISC (vector integer cmp != 0) */
1368      Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1369
1370      /* ADDITION (normal / unsigned sat / signed sat) */
1371      Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
1372      Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1373      Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1374
1375      /* SUBTRACTION (normal / unsigned sat / signed sat) */
1376      Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
1377      Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1378      Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1379
1380      /* MULTIPLICATION (normal / high half of signed/unsigned) */
1381      Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
1382                    Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1383                    Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1384      /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1385      Iop_MullEven8Ux16, Iop_MullEven16Ux8,
1386      Iop_MullEven8Sx16, Iop_MullEven16Sx8,
1387      /* FIXME: document these */
1388      Iop_Mull8Ux8, Iop_Mull8Sx8,
1389      Iop_Mull16Ux4, Iop_Mull16Sx4,
1390      Iop_Mull32Ux2, Iop_Mull32Sx2,
1391      /* Vector Saturating Doubling Multiply Returning High Half and
1392         Vector Saturating Rounding Doubling Multiply Returning High Half */
1393      /* These IROp's multiply corresponding elements in two vectors, double
1394         the results, and place the most significant half of the final results
1395         in the destination vector. The results are truncated or rounded. If
1396         any of the results overflow, they are saturated. */
1397      Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1398      Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1399      /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1400      Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1401      /* Plynomial multiplication treats it's arguments as coefficients of
1402         polynoms over {0, 1}. */
1403      Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1404      Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
1405
1406      /* PAIRWISE operations */
1407      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1408            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1409      Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1410      Iop_PwAdd32Fx2,
1411      /* Longening variant is unary. The resulting vector contains two times
1412         less elements than operand, but they are two times wider.
1413         Example:
1414            Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1415               where a+b and c+d are unsigned 32-bit values. */
1416      Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1417      Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1418
1419      /* ABSOLUTE VALUE */
1420      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1421
1422      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1423      Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1424      Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1425
1426      /* MIN/MAX */
1427      Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
1428      Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
1429      Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
1430      Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
1431
1432      /* COMPARISON */
1433      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
1434      Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1435      Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
1436
1437      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1438         bit) */
1439      Iop_Cnt8x16,
1440      Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1441      Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1442
1443      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1444      Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1445      Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1446      Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1447
1448      /* VECTOR x VECTOR SHIFT / ROTATE */
1449      Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1450      Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1451      Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1452      Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1453      Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
1454
1455      /* VECTOR x VECTOR SATURATING SHIFT */
1456      Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1457      Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1458      /* VECTOR x INTEGER SATURATING SHIFT */
1459      Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1460      Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1461      Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1462
1463      /* NARROWING (binary)
1464         -- narrow 2xV128 into 1xV128, hi half from left arg */
1465      /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1466      Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1467      Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1468      Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1469      Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1470
1471      /* NARROWING (unary) -- narrow V128 into I64 */
1472      Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1473      /* Saturating narrowing from signed source to signed/unsigned destination */
1474      Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1475      Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1476      /* Saturating narrowing from unsigned source to unsigned destination */
1477      Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1478
1479      /* WIDENING -- sign or zero extend each element of the argument
1480         vector to the twice original size.  The resulting vector consists of
1481         the same number of elements but each element and the vector itself
1482         are twice as wide.
1483         All operations are I64->V128.
1484         Example
1485            Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1486               where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1487      Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1488      Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1489
1490      /* INTERLEAVING */
1491      /* Interleave lanes from low or high halves of
1492         operands.  Most-significant result lane is from the left
1493         arg. */
1494      Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1495      Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1496      Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1497      Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1498      /* Interleave odd/even lanes of operands.  Most-significant result lane
1499         is from the left arg. */
1500      Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1501      Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1502      Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1503
1504      /* CONCATENATION -- build a new value by concatenating either
1505         the even or odd lanes of both operands. */
1506      Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1507      Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1508
1509      /* GET elements of VECTOR
1510         GET is binop (V128, I8) -> I<elem_size> */
1511      /* Note: the arm back-end handles only constant second argument. */
1512      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1513
1514      /* DUPLICATING -- copy value to all lanes */
1515      Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
1516
1517      /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1518         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1519         result.
1520         It is a triop: (V128, V128, I8) -> V128 */
1521      /* Note: the ARM back end handles only constant arg3 in this operation. */
1522      Iop_ExtractV128,
1523
1524      /* REVERSE the order of elements in each Half-words, Words,
1525         Double-words */
1526      /* Examples:
1527            Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1528            Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1529      Iop_Reverse16_8x16,
1530      Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1531      Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1532
1533      /* PERMUTING -- copy src bytes to dst,
1534         as indexed by control vector bytes:
1535            for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1536         argR[i] values may only be in the range 0 .. 15, else behaviour
1537         is undefined. */
1538      Iop_Perm8x16,
1539      Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
1540
1541      /* MISC CONVERSION -- get high bits of each byte lane, a la
1542         x86/amd64 pmovmskb */
1543      Iop_GetMSBs8x16, /* V128 -> I16 */
1544
1545      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1546         See floating-point equiwalents for details. */
1547      Iop_Recip32x4, Iop_Rsqrte32x4,
1548
1549      /* ------------------ 256-bit SIMD Integer. ------------------ */
1550
1551      /* Pack/unpack */
1552      Iop_V256to64_0,  // V256 -> I64, extract least significant lane
1553      Iop_V256to64_1,
1554      Iop_V256to64_2,
1555      Iop_V256to64_3,  // V256 -> I64, extract most significant lane
1556
1557      Iop_64x4toV256,  // (I64,I64,I64,I64)->V256
1558                       // first arg is most significant lane
1559
1560      Iop_V256toV128_0, // V256 -> V128, less significant lane
1561      Iop_V256toV128_1, // V256 -> V128, more significant lane
1562      Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif
1563
1564      Iop_AndV256,
1565      Iop_OrV256,
1566      Iop_XorV256,
1567      Iop_NotV256,
1568
1569      /* MISC (vector integer cmp != 0) */
1570      Iop_CmpNEZ8x32, Iop_CmpNEZ16x16, Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
1571
1572      Iop_Add8x32,    Iop_Add16x16,    Iop_Add32x8,    Iop_Add64x4,
1573      Iop_Sub8x32,    Iop_Sub16x16,    Iop_Sub32x8,    Iop_Sub64x4,
1574
1575      Iop_CmpEQ8x32,  Iop_CmpEQ16x16,  Iop_CmpEQ32x8,  Iop_CmpEQ64x4,
1576      Iop_CmpGT8Sx32, Iop_CmpGT16Sx16, Iop_CmpGT32Sx8, Iop_CmpGT64Sx4,
1577
1578      Iop_ShlN16x16, Iop_ShlN32x8, Iop_ShlN64x4,
1579      Iop_ShrN16x16, Iop_ShrN32x8, Iop_ShrN64x4,
1580      Iop_SarN16x16, Iop_SarN32x8,
1581
1582      Iop_Max8Sx32, Iop_Max16Sx16, Iop_Max32Sx8,
1583      Iop_Max8Ux32, Iop_Max16Ux16, Iop_Max32Ux8,
1584      Iop_Min8Sx32, Iop_Min16Sx16, Iop_Min32Sx8,
1585      Iop_Min8Ux32, Iop_Min16Ux16, Iop_Min32Ux8,
1586
1587      Iop_Mul16x16, Iop_Mul32x8,
1588      Iop_MulHi16Ux16, Iop_MulHi16Sx16,
1589
1590      Iop_QAdd8Ux32, Iop_QAdd16Ux16,
1591      Iop_QAdd8Sx32, Iop_QAdd16Sx16,
1592      Iop_QSub8Ux32, Iop_QSub16Ux16,
1593      Iop_QSub8Sx32, Iop_QSub16Sx16,
1594
1595      Iop_Avg8Ux32, Iop_Avg16Ux16,
1596
1597      Iop_Perm32x8,
1598
1599      /* ------------------ 256-bit SIMD FP. ------------------ */
1600      Iop_Add64Fx4,
1601      Iop_Sub64Fx4,
1602      Iop_Mul64Fx4,
1603      Iop_Div64Fx4,
1604      Iop_Add32Fx8,
1605      Iop_Sub32Fx8,
1606      Iop_Mul32Fx8,
1607      Iop_Div32Fx8,
1608
1609      Iop_Sqrt32Fx8,
1610      Iop_Sqrt64Fx4,
1611      Iop_RSqrt32Fx8,
1612      Iop_Recip32Fx8,
1613
1614      Iop_Max32Fx8, Iop_Min32Fx8,
1615      Iop_Max64Fx4, Iop_Min64Fx4,
1616      Iop_LAST      /* must be the last enumerator */
1617   }
1618   IROp;
1619
1620/* Pretty-print an op. */
1621extern void ppIROp ( IROp );
1622
1623
1624/* Encoding of IEEE754-specified rounding modes.
1625   Note, various front and back ends rely on the actual numerical
1626   values of these, so do not change them. */
1627typedef
1628   enum {
1629      Irrm_NEAREST              = 0,  // Round to nearest, ties to even
1630      Irrm_NegINF               = 1,  // Round to negative infinity
1631      Irrm_PosINF               = 2,  // Round to positive infinity
1632      Irrm_ZERO                 = 3,  // Round toward zero
1633      Irrm_NEAREST_TIE_AWAY_0   = 4,  // Round to nearest, ties away from 0
1634      Irrm_PREPARE_SHORTER      = 5,  // Round to prepare for storter
1635                                      // precision
1636      Irrm_AWAY_FROM_ZERO       = 6,  // Round to away from 0
1637      Irrm_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
1638   }
1639   IRRoundingMode;
1640
1641/* Binary floating point comparison result values.
1642   This is also derived from what IA32 does. */
1643typedef
1644   enum {
1645      Ircr_UN = 0x45,
1646      Ircr_LT = 0x01,
1647      Ircr_GT = 0x00,
1648      Ircr_EQ = 0x40
1649   }
1650   IRCmpFResult;
1651
1652typedef IRCmpFResult IRCmpF32Result;
1653typedef IRCmpFResult IRCmpF64Result;
1654typedef IRCmpFResult IRCmpF128Result;
1655
1656/* Decimal floating point result values. */
1657typedef IRCmpFResult IRCmpDResult;
1658typedef IRCmpDResult IRCmpD64Result;
1659typedef IRCmpDResult IRCmpD128Result;
1660
1661/* ------------------ Expressions ------------------ */
1662
1663typedef struct _IRQop   IRQop;   /* forward declaration */
1664typedef struct _IRTriop IRTriop; /* forward declaration */
1665
1666
1667/* The different kinds of expressions.  Their meaning is explained below
1668   in the comments for IRExpr. */
1669typedef
1670   enum {
1671      Iex_Binder=0x1900,
1672      Iex_Get,
1673      Iex_GetI,
1674      Iex_RdTmp,
1675      Iex_Qop,
1676      Iex_Triop,
1677      Iex_Binop,
1678      Iex_Unop,
1679      Iex_Load,
1680      Iex_Const,
1681      Iex_ITE,
1682      Iex_CCall
1683   }
1684   IRExprTag;
1685
1686/* An expression.  Stored as a tagged union.  'tag' indicates what kind
1687   of expression this is.  'Iex' is the union that holds the fields.  If
1688   an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1689   expression, and the fields can be accessed with
1690   'e.Iex.Load.<fieldname>'.
1691
1692   For each kind of expression, we show what it looks like when
1693   pretty-printed with ppIRExpr().
1694*/
1695typedef
1696   struct _IRExpr
1697   IRExpr;
1698
1699struct _IRExpr {
1700   IRExprTag tag;
1701   union {
1702      /* Used only in pattern matching within Vex.  Should not be seen
1703         outside of Vex. */
1704      struct {
1705         Int binder;
1706      } Binder;
1707
1708      /* Read a guest register, at a fixed offset in the guest state.
1709         ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1710      */
1711      struct {
1712         Int    offset;    /* Offset into the guest state */
1713         IRType ty;        /* Type of the value being read */
1714      } Get;
1715
1716      /* Read a guest register at a non-fixed offset in the guest
1717         state.  This allows circular indexing into parts of the guest
1718         state, which is essential for modelling situations where the
1719         identity of guest registers is not known until run time.  One
1720         example is the x87 FP register stack.
1721
1722         The part of the guest state to be treated as a circular array
1723         is described in the IRRegArray 'descr' field.  It holds the
1724         offset of the first element in the array, the type of each
1725         element, and the number of elements.
1726
1727         The array index is indicated rather indirectly, in a way
1728         which makes optimisation easy: as the sum of variable part
1729         (the 'ix' field) and a constant offset (the 'bias' field).
1730
1731         Since the indexing is circular, the actual array index to use
1732         is computed as (ix + bias) % num-of-elems-in-the-array.
1733
1734         Here's an example.  The description
1735
1736            (96:8xF64)[t39,-7]
1737
1738         describes an array of 8 F64-typed values, the
1739         guest-state-offset of the first being 96.  This array is
1740         being indexed at (t39 - 7) % 8.
1741
1742         It is important to get the array size/type exactly correct
1743         since IR optimisation looks closely at such info in order to
1744         establish aliasing/non-aliasing between seperate GetI and
1745         PutI events, which is used to establish when they can be
1746         reordered, etc.  Putting incorrect info in will lead to
1747         obscure IR optimisation bugs.
1748
1749            ppIRExpr output: GETI<descr>[<ix>,<bias]
1750                         eg. GETI(128:8xI8)[t1,0]
1751      */
1752      struct {
1753         IRRegArray* descr; /* Part of guest state treated as circular */
1754         IRExpr*     ix;    /* Variable part of index into array */
1755         Int         bias;  /* Constant offset part of index into array */
1756      } GetI;
1757
1758      /* The value held by a temporary.
1759         ppIRExpr output: t<tmp>, eg. t1
1760      */
1761      struct {
1762         IRTemp tmp;       /* The temporary number */
1763      } RdTmp;
1764
1765      /* A quaternary operation.
1766         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1767                      eg. MAddF64r32(t1, t2, t3, t4)
1768      */
1769      struct {
1770        IRQop* details;
1771      } Qop;
1772
1773      /* A ternary operation.
1774         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1775                      eg. MulF64(1, 2.0, 3.0)
1776      */
1777      struct {
1778        IRTriop* details;
1779      } Triop;
1780
1781      /* A binary operation.
1782         ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1783      */
1784      struct {
1785         IROp op;          /* op-code   */
1786         IRExpr* arg1;     /* operand 1 */
1787         IRExpr* arg2;     /* operand 2 */
1788      } Binop;
1789
1790      /* A unary operation.
1791         ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1792      */
1793      struct {
1794         IROp    op;       /* op-code */
1795         IRExpr* arg;      /* operand */
1796      } Unop;
1797
1798      /* A load from memory -- a normal load, not a load-linked.
1799         Load-Linkeds (and Store-Conditionals) are instead represented
1800         by IRStmt.LLSC since Load-Linkeds have side effects and so
1801         are not semantically valid IRExpr's.
1802         ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1803      */
1804      struct {
1805         IREndness end;    /* Endian-ness of the load */
1806         IRType    ty;     /* Type of the loaded value */
1807         IRExpr*   addr;   /* Address being loaded from */
1808      } Load;
1809
1810      /* A constant-valued expression.
1811         ppIRExpr output: <con>, eg. 0x4:I32
1812      */
1813      struct {
1814         IRConst* con;     /* The constant itself */
1815      } Const;
1816
1817      /* A call to a pure (no side-effects) helper C function.
1818
1819         With the 'cee' field, 'name' is the function's name.  It is
1820         only used for pretty-printing purposes.  The address to call
1821         (host address, of course) is stored in the 'addr' field
1822         inside 'cee'.
1823
1824         The 'args' field is a NULL-terminated array of arguments.
1825         The stated return IRType, and the implied argument types,
1826         must match that of the function being called well enough so
1827         that the back end can actually generate correct code for the
1828         call.
1829
1830         The called function **must** satisfy the following:
1831
1832         * no side effects -- must be a pure function, the result of
1833           which depends only on the passed parameters.
1834
1835         * it may not look at, nor modify, any of the guest state
1836           since that would hide guest state transitions from
1837           instrumenters
1838
1839         * it may not access guest memory, since that would hide
1840           guest memory transactions from the instrumenters
1841
1842         * it must not assume that arguments are being evaluated in a
1843           particular order. The oder of evaluation is unspecified.
1844
1845         This is restrictive, but makes the semantics clean, and does
1846         not interfere with IR optimisation.
1847
1848         If you want to call a helper which can mess with guest state
1849         and/or memory, instead use Ist_Dirty.  This is a lot more
1850         flexible, but you have to give a bunch of details about what
1851         the helper does (and you better be telling the truth,
1852         otherwise any derived instrumentation will be wrong).  Also
1853         Ist_Dirty inhibits various IR optimisations and so can cause
1854         quite poor code to be generated.  Try to avoid it.
1855
1856         In principle it would be allowable to have the arg vector
1857         contain the special value IRExprP__VECRET, although not
1858         IRExprP__BBPTR.  However, at the moment there is no
1859         requirement for clean helper calls to be able to return V128
1860         or V256 values.  Hence this is not allowed.
1861
1862         ppIRExpr output: <cee>(<args>):<retty>
1863                      eg. foo{0x80489304}(t1, t2):I32
1864      */
1865      struct {
1866         IRCallee* cee;    /* Function to call. */
1867         IRType    retty;  /* Type of return value. */
1868         IRExpr**  args;   /* Vector of argument expressions. */
1869      }  CCall;
1870
1871      /* A ternary if-then-else operator.  It returns iftrue if cond is
1872         nonzero, iffalse otherwise.  Note that it is STRICT, ie. both
1873         iftrue and iffalse are evaluated in all cases.
1874
1875         ppIRExpr output: ITE(<cond>,<iftrue>,<iffalse>),
1876                         eg. ITE(t6,t7,t8)
1877      */
1878      struct {
1879         IRExpr* cond;     /* Condition */
1880         IRExpr* iftrue;   /* True expression */
1881         IRExpr* iffalse;  /* False expression */
1882      } ITE;
1883   } Iex;
1884};
1885
1886/* Expression auxiliaries: a ternary expression. */
1887struct _IRTriop {
1888   IROp op;          /* op-code   */
1889   IRExpr* arg1;     /* operand 1 */
1890   IRExpr* arg2;     /* operand 2 */
1891   IRExpr* arg3;     /* operand 3 */
1892};
1893
1894/* Expression auxiliaries: a quarternary expression. */
1895struct _IRQop {
1896   IROp op;          /* op-code   */
1897   IRExpr* arg1;     /* operand 1 */
1898   IRExpr* arg2;     /* operand 2 */
1899   IRExpr* arg3;     /* operand 3 */
1900   IRExpr* arg4;     /* operand 4 */
1901};
1902
1903
1904/* Two special constants of type IRExpr*, which can ONLY be used in
1905   argument lists for dirty helper calls (IRDirty.args) and in NO
1906   OTHER PLACES.  And then only in very limited ways.  These constants
1907   are not pointer-aligned and hence can't be confused with real
1908   IRExpr*s nor with NULL. */
1909
1910/* Denotes an argument which (in the helper) takes a pointer to a
1911   (naturally aligned) V128 or V256, into which the helper is expected
1912   to write its result.  Use of IRExprP__VECRET is strictly
1913   controlled.  If the helper returns a V128 or V256 value then
1914   IRExprP__VECRET must appear exactly once in the arg list, although
1915   it can appear anywhere, and the helper must have a C 'void' return
1916   type.  If the helper returns any other type, IRExprP__VECRET may
1917   not appear in the argument list. */
1918#define IRExprP__VECRET ((IRExpr*)9)
1919
1920/* Denotes an void* argument which is passed to the helper, which at
1921   run time will point to the thread's guest state area.  This can
1922   only appear at most once in an argument list, and it may not appear
1923   at all in argument lists for clean helper calls. */
1924#define IRExprP__BBPTR  ((IRExpr*)17)
1925
1926static inline Bool is_IRExprP__VECRET_or_BBPTR ( IRExpr* e ) {
1927   return e == IRExprP__VECRET || e == IRExprP__BBPTR;
1928}
1929
1930
1931/* Expression constructors. */
1932extern IRExpr* IRExpr_Binder ( Int binder );
1933extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
1934extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
1935extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
1936extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
1937                                        IRExpr* arg3, IRExpr* arg4 );
1938extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
1939                                        IRExpr* arg2, IRExpr* arg3 );
1940extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
1941extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
1942extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
1943extern IRExpr* IRExpr_Const  ( IRConst* con );
1944extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
1945extern IRExpr* IRExpr_ITE    ( IRExpr* cond, IRExpr* iftrue, IRExpr* iffalse );
1946
1947/* Deep-copy an IRExpr. */
1948extern IRExpr* deepCopyIRExpr ( IRExpr* );
1949
1950/* Pretty-print an IRExpr. */
1951extern void ppIRExpr ( IRExpr* );
1952
1953/* NULL-terminated IRExpr vector constructors, suitable for
1954   use as arg lists in clean/dirty helper calls. */
1955extern IRExpr** mkIRExprVec_0 ( void );
1956extern IRExpr** mkIRExprVec_1 ( IRExpr* );
1957extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
1958extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
1959extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
1960extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1961                                IRExpr* );
1962extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1963                                IRExpr*, IRExpr* );
1964extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1965                                IRExpr*, IRExpr*, IRExpr* );
1966extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1967                                IRExpr*, IRExpr*, IRExpr*, IRExpr*);
1968
1969/* IRExpr copiers:
1970   - shallowCopy: shallow-copy (ie. create a new vector that shares the
1971     elements with the original).
1972   - deepCopy: deep-copy (ie. create a completely new vector). */
1973extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
1974extern IRExpr** deepCopyIRExprVec ( IRExpr** );
1975
1976/* Make a constant expression from the given host word taking into
1977   account (of course) the host word size. */
1978extern IRExpr* mkIRExpr_HWord ( HWord );
1979
1980/* Convenience function for constructing clean helper calls. */
1981extern
1982IRExpr* mkIRExprCCall ( IRType retty,
1983                        Int regparms, const HChar* name, void* addr,
1984                        IRExpr** args );
1985
1986
1987/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
1988 * Iex_Const). */
1989static inline Bool isIRAtom ( IRExpr* e ) {
1990   return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
1991}
1992
1993/* Are these two IR atoms identical?  Causes an assertion
1994   failure if they are passed non-atoms. */
1995extern Bool eqIRAtom ( IRExpr*, IRExpr* );
1996
1997
1998/* ------------------ Jump kinds ------------------ */
1999
2000/* This describes hints which can be passed to the dispatcher at guest
2001   control-flow transfer points.
2002
2003   Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
2004   guest_TISTART and guest_TILEN, which specify the start and length
2005   of the region to be invalidated.  These are both the size of a
2006   guest word.  It is the responsibility of the relevant toIR.c to
2007   ensure that these are filled in with suitable values before issuing
2008   a jump of kind Ijk_TInval.
2009
2010   Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
2011   pseudo-register guest_EMNOTE, which is 32-bits regardless of the
2012   host or guest word size.  That register should be made to hold a
2013   VexEmNote value to indicate the reason for the exit.
2014
2015   In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
2016   cannot continue) and so the jump destination can be anything.
2017
2018   Re Ijk_Sys_ (syscall jumps): the guest state must have a
2019   pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
2020   word.  Front ends should set this to be the IP at the most recently
2021   executed kernel-entering (system call) instruction.  This makes it
2022   very much easier (viz, actually possible at all) to back up the
2023   guest to restart a syscall that has been interrupted by a signal.
2024*/
2025typedef
2026   enum {
2027      Ijk_INVALID=0x1A00,
2028      Ijk_Boring,         /* not interesting; just goto next */
2029      Ijk_Call,           /* guest is doing a call */
2030      Ijk_Ret,            /* guest is doing a return */
2031      Ijk_ClientReq,      /* do guest client req before continuing */
2032      Ijk_Yield,          /* client is yielding to thread scheduler */
2033      Ijk_EmWarn,         /* report emulation warning before continuing */
2034      Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
2035      Ijk_NoDecode,       /* current instruction cannot be decoded */
2036      Ijk_MapFail,        /* Vex-provided address translation failed */
2037      Ijk_TInval,         /* Invalidate translations before continuing. */
2038      Ijk_NoRedir,        /* Jump to un-redirected guest addr */
2039      Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
2040      Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
2041      Ijk_SigBUS,         /* current instruction synths SIGBUS */
2042      Ijk_SigFPE_IntDiv,  /* current instruction synths SIGFPE - IntDiv */
2043      Ijk_SigFPE_IntOvf,  /* current instruction synths SIGFPE - IntOvf */
2044      /* Unfortunately, various guest-dependent syscall kinds.  They
2045	 all mean: do a syscall before continuing. */
2046      Ijk_Sys_syscall,    /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
2047      Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
2048      Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
2049      Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
2050      Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
2051      Ijk_Sys_sysenter    /* x86 'sysenter'.  guest_EIP becomes
2052                             invalid at the point this happens. */
2053   }
2054   IRJumpKind;
2055
2056extern void ppIRJumpKind ( IRJumpKind );
2057
2058
2059/* ------------------ Dirty helper calls ------------------ */
2060
2061/* A dirty call is a flexible mechanism for calling (possibly
2062   conditionally) a helper function or procedure.  The helper function
2063   may read, write or modify client memory, and may read, write or
2064   modify client state.  It can take arguments and optionally return a
2065   value.  It may return different results and/or do different things
2066   when called repeatedly with the same arguments, by means of storing
2067   private state.
2068
2069   If a value is returned, it is assigned to the nominated return
2070   temporary.
2071
2072   Dirty calls are statements rather than expressions for obvious
2073   reasons.  If a dirty call is marked as writing guest state, any
2074   pre-existing values derived from the written parts of the guest
2075   state are invalid.  Similarly, if the dirty call is stated as
2076   writing memory, any pre-existing loaded values are invalidated by
2077   it.
2078
2079   In order that instrumentation is possible, the call must state, and
2080   state correctly:
2081
2082   * Whether it reads, writes or modifies memory, and if so where.
2083
2084   * Whether it reads, writes or modifies guest state, and if so which
2085     pieces.  Several pieces may be stated, and their extents must be
2086     known at translation-time.  Each piece is allowed to repeat some
2087     number of times at a fixed interval, if required.
2088
2089   Normally, code is generated to pass just the args to the helper.
2090   However, if IRExprP__BBPTR is present in the argument list (at most
2091   one instance is allowed), then the baseblock pointer is passed for
2092   that arg, so that the callee can access the guest state.  It is
2093   invalid for .nFxState to be zero but IRExprP__BBPTR to be present,
2094   since .nFxState==0 is a claim that the call does not access guest
2095   state.
2096
2097   IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
2098   arguments and 'mFx' are evaluated REGARDLESS of the guard value.
2099   The order of argument evaluation is unspecified.  The guard
2100   expression is evaluated AFTER the arguments and 'mFx' have been
2101   evaluated.  'mFx' is expected (by Memcheck) to be a defined value
2102   even if the guard evaluates to false.
2103*/
2104
2105#define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
2106
2107/* Effects on resources (eg. registers, memory locations) */
2108typedef
2109   enum {
2110      Ifx_None=0x1B00,      /* no effect */
2111      Ifx_Read,             /* reads the resource */
2112      Ifx_Write,            /* writes the resource */
2113      Ifx_Modify,           /* modifies the resource */
2114   }
2115   IREffect;
2116
2117/* Pretty-print an IREffect */
2118extern void ppIREffect ( IREffect );
2119
2120typedef
2121   struct _IRDirty {
2122      /* What to call, and details of args/results.  .guard must be
2123         non-NULL.  If .tmp is not IRTemp_INVALID, then the call
2124         returns a result which is placed in .tmp.  If at runtime the
2125         guard evaluates to false, .tmp has an 0x555..555 bit pattern
2126         written to it.  Hence conditional calls that assign .tmp are
2127         allowed. */
2128      IRCallee* cee;    /* where to call */
2129      IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
2130      /* The args vector may contain IRExprP__BBPTR and/or
2131         IRExprP__VECRET, in both cases, at most once. */
2132      IRExpr**  args;   /* arg vector, ends in NULL. */
2133      IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
2134
2135      /* Mem effects; we allow only one R/W/M region to be stated */
2136      IREffect  mFx;    /* indicates memory effects, if any */
2137      IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
2138      Int       mSize;  /* of access, or zero if mFx==Ifx_None */
2139
2140      /* Guest state effects; up to N allowed */
2141      Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
2142      struct {
2143         IREffect fx:16;   /* read, write or modify?  Ifx_None is invalid. */
2144         UShort   offset;
2145         UShort   size;
2146         UChar    nRepeats;
2147         UChar    repeatLen;
2148      } fxState[VEX_N_FXSTATE];
2149      /* The access can be repeated, as specified by nRepeats and
2150         repeatLen.  To describe only a single access, nRepeats and
2151         repeatLen should be zero.  Otherwise, repeatLen must be a
2152         multiple of size and greater than size. */
2153      /* Overall, the parts of the guest state denoted by (offset,
2154         size, nRepeats, repeatLen) is
2155               [offset, +size)
2156            and, if nRepeats > 0,
2157               for (i = 1; i <= nRepeats; i++)
2158                  [offset + i * repeatLen, +size)
2159         A convenient way to enumerate all segments is therefore
2160            for (i = 0; i < 1 + nRepeats; i++)
2161               [offset + i * repeatLen, +size)
2162      */
2163   }
2164   IRDirty;
2165
2166/* Pretty-print a dirty call */
2167extern void     ppIRDirty ( IRDirty* );
2168
2169/* Allocate an uninitialised dirty call */
2170extern IRDirty* emptyIRDirty ( void );
2171
2172/* Deep-copy a dirty call */
2173extern IRDirty* deepCopyIRDirty ( IRDirty* );
2174
2175/* A handy function which takes some of the tedium out of constructing
2176   dirty helper calls.  The called function impliedly does not return
2177   any value and has a constant-True guard.  The call is marked as
2178   accessing neither guest state nor memory (hence the "unsafe"
2179   designation) -- you can change this marking later if need be.  A
2180   suitable IRCallee is constructed from the supplied bits. */
2181extern
2182IRDirty* unsafeIRDirty_0_N ( Int regparms, const HChar* name, void* addr,
2183                             IRExpr** args );
2184
2185/* Similarly, make a zero-annotation dirty call which returns a value,
2186   and assign that to the given temp. */
2187extern
2188IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
2189                             Int regparms, const HChar* name, void* addr,
2190                             IRExpr** args );
2191
2192
2193/* --------------- Memory Bus Events --------------- */
2194
2195typedef
2196   enum {
2197      Imbe_Fence=0x1C00,
2198      /* Needed only on ARM.  It cancels a reservation made by a
2199         preceding Linked-Load, and needs to be handed through to the
2200         back end, just as LL and SC themselves are. */
2201      Imbe_CancelReservation
2202   }
2203   IRMBusEvent;
2204
2205extern void ppIRMBusEvent ( IRMBusEvent );
2206
2207
2208/* --------------- Compare and Swap --------------- */
2209
2210/* This denotes an atomic compare and swap operation, either
2211   a single-element one or a double-element one.
2212
2213   In the single-element case:
2214
2215     .addr is the memory address.
2216     .end  is the endianness with which memory is accessed
2217
2218     If .addr contains the same value as .expdLo, then .dataLo is
2219     written there, else there is no write.  In both cases, the
2220     original value at .addr is copied into .oldLo.
2221
2222     Types: .expdLo, .dataLo and .oldLo must all have the same type.
2223     It may be any integral type, viz: I8, I16, I32 or, for 64-bit
2224     guests, I64.
2225
2226     .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
2227     be NULL.
2228
2229   In the double-element case:
2230
2231     .addr is the memory address.
2232     .end  is the endianness with which memory is accessed
2233
2234     The operation is the same:
2235
2236     If .addr contains the same value as .expdHi:.expdLo, then
2237     .dataHi:.dataLo is written there, else there is no write.  In
2238     both cases the original value at .addr is copied into
2239     .oldHi:.oldLo.
2240
2241     Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
2242     all have the same type, which may be any integral type, viz: I8,
2243     I16, I32 or, for 64-bit guests, I64.
2244
2245     The double-element case is complicated by the issue of
2246     endianness.  In all cases, the two elements are understood to be
2247     located adjacently in memory, starting at the address .addr.
2248
2249       If .end is Iend_LE, then the .xxxLo component is at the lower
2250       address and the .xxxHi component is at the higher address, and
2251       each component is itself stored little-endianly.
2252
2253       If .end is Iend_BE, then the .xxxHi component is at the lower
2254       address and the .xxxLo component is at the higher address, and
2255       each component is itself stored big-endianly.
2256
2257   This allows representing more cases than most architectures can
2258   handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
2259
2260   How to know if the CAS succeeded?
2261
2262   * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
2263     then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
2264     stored at .addr, and the original value there was .oldLo (resp
2265     .oldHi:.oldLo).
2266
2267   * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
2268     then the CAS failed, and the original value at .addr was .oldLo
2269     (resp. .oldHi:.oldLo).
2270
2271   Hence it is easy to know whether or not the CAS succeeded.
2272*/
2273typedef
2274   struct {
2275      IRTemp    oldHi;  /* old value of *addr is written here */
2276      IRTemp    oldLo;
2277      IREndness end;    /* endianness of the data in memory */
2278      IRExpr*   addr;   /* store address */
2279      IRExpr*   expdHi; /* expected old value at *addr */
2280      IRExpr*   expdLo;
2281      IRExpr*   dataHi; /* new value for *addr */
2282      IRExpr*   dataLo;
2283   }
2284   IRCAS;
2285
2286extern void ppIRCAS ( IRCAS* cas );
2287
2288extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
2289                        IREndness end, IRExpr* addr,
2290                        IRExpr* expdHi, IRExpr* expdLo,
2291                        IRExpr* dataHi, IRExpr* dataLo );
2292
2293extern IRCAS* deepCopyIRCAS ( IRCAS* );
2294
2295
2296/* ------------------ Circular Array Put ------------------ */
2297
2298typedef
2299   struct {
2300      IRRegArray* descr; /* Part of guest state treated as circular */
2301      IRExpr*     ix;    /* Variable part of index into array */
2302      Int         bias;  /* Constant offset part of index into array */
2303      IRExpr*     data;  /* The value to write */
2304   } IRPutI;
2305
2306extern void ppIRPutI ( IRPutI* puti );
2307
2308extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix,
2309                          Int bias, IRExpr* data );
2310
2311extern IRPutI* deepCopyIRPutI ( IRPutI* );
2312
2313
2314/* --------------- Guarded loads and stores --------------- */
2315
2316/* Conditional stores are straightforward.  They are the same as
2317   normal stores, with an extra 'guard' field :: Ity_I1 that
2318   determines whether or not the store actually happens.  If not,
2319   memory is unmodified.
2320
2321   The semantics of this is that 'addr' and 'data' are fully evaluated
2322   even in the case where 'guard' evaluates to zero (false).
2323*/
2324typedef
2325   struct {
2326      IREndness end;    /* Endianness of the store */
2327      IRExpr*   addr;   /* store address */
2328      IRExpr*   data;   /* value to write */
2329      IRExpr*   guard;  /* Guarding value */
2330   }
2331   IRStoreG;
2332
2333/* Conditional loads are a little more complex.  'addr' is the
2334   address, 'guard' is the guarding condition.  If the load takes
2335   place, the loaded value is placed in 'dst'.  If it does not take
2336   place, 'alt' is copied to 'dst'.  However, the loaded value is not
2337   placed directly in 'dst' -- it is first subjected to the conversion
2338   specified by 'cvt'.
2339
2340   For example, imagine doing a conditional 8-bit load, in which the
2341   loaded value is zero extended to 32 bits.  Hence:
2342   * 'dst' and 'alt' must have type I32
2343   * 'cvt' must be a unary op which converts I8 to I32.  In this
2344     example, it would be ILGop_8Uto32.
2345
2346   There is no explicit indication of the type at which the load is
2347   done, since that is inferrable from the arg type of 'cvt'.  Note
2348   that the types of 'alt' and 'dst' and the result type of 'cvt' must
2349   all be the same.
2350
2351   Semantically, 'addr' is evaluated even in the case where 'guard'
2352   evaluates to zero (false), and 'alt' is evaluated even when 'guard'
2353   evaluates to one (true).  That is, 'addr' and 'alt' are always
2354   evaluated.
2355*/
2356typedef
2357   enum {
2358      ILGop_INVALID=0x1D00,
2359      ILGop_Ident32,   /* 32 bit, no conversion */
2360      ILGop_16Uto32,   /* 16 bit load, Z-widen to 32 */
2361      ILGop_16Sto32,   /* 16 bit load, S-widen to 32 */
2362      ILGop_8Uto32,    /* 8 bit load, Z-widen to 32 */
2363      ILGop_8Sto32     /* 8 bit load, S-widen to 32 */
2364   }
2365   IRLoadGOp;
2366
2367typedef
2368   struct {
2369      IREndness end;    /* Endianness of the load */
2370      IRLoadGOp cvt;    /* Conversion to apply to the loaded value */
2371      IRTemp    dst;    /* Destination (LHS) of assignment */
2372      IRExpr*   addr;   /* Address being loaded from */
2373      IRExpr*   alt;    /* Value if load is not done. */
2374      IRExpr*   guard;  /* Guarding value */
2375   }
2376   IRLoadG;
2377
2378extern void ppIRStoreG ( IRStoreG* sg );
2379
2380extern void ppIRLoadGOp ( IRLoadGOp cvt );
2381
2382extern void ppIRLoadG ( IRLoadG* lg );
2383
2384extern IRStoreG* mkIRStoreG ( IREndness end,
2385                              IRExpr* addr, IRExpr* data,
2386                              IRExpr* guard );
2387
2388extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt,
2389                            IRTemp dst, IRExpr* addr, IRExpr* alt,
2390                            IRExpr* guard );
2391
2392
2393/* ------------------ Statements ------------------ */
2394
2395/* The different kinds of statements.  Their meaning is explained
2396   below in the comments for IRStmt.
2397
2398   Those marked META do not represent code, but rather extra
2399   information about the code.  These statements can be removed
2400   without affecting the functional behaviour of the code, however
2401   they are required by some IR consumers such as tools that
2402   instrument the code.
2403*/
2404
2405typedef
2406   enum {
2407      Ist_NoOp=0x1E00,
2408      Ist_IMark,     /* META */
2409      Ist_AbiHint,   /* META */
2410      Ist_Put,
2411      Ist_PutI,
2412      Ist_WrTmp,
2413      Ist_Store,
2414      Ist_LoadG,
2415      Ist_StoreG,
2416      Ist_CAS,
2417      Ist_LLSC,
2418      Ist_Dirty,
2419      Ist_MBE,
2420      Ist_Exit
2421   }
2422   IRStmtTag;
2423
2424/* A statement.  Stored as a tagged union.  'tag' indicates what kind
2425   of expression this is.  'Ist' is the union that holds the fields.
2426   If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
2427   statement, and the fields can be accessed with
2428   'st.Ist.Store.<fieldname>'.
2429
2430   For each kind of statement, we show what it looks like when
2431   pretty-printed with ppIRStmt().
2432*/
2433typedef
2434   struct _IRStmt {
2435      IRStmtTag tag;
2436      union {
2437         /* A no-op (usually resulting from IR optimisation).  Can be
2438            omitted without any effect.
2439
2440            ppIRStmt output: IR-NoOp
2441         */
2442         struct {
2443	 } NoOp;
2444
2445         /* META: instruction mark.  Marks the start of the statements
2446            that represent a single machine instruction (the end of
2447            those statements is marked by the next IMark or the end of
2448            the IRSB).  Contains the address and length of the
2449            instruction.
2450
2451            It also contains a delta value.  The delta must be
2452            subtracted from a guest program counter value before
2453            attempting to establish, by comparison with the address
2454            and length values, whether or not that program counter
2455            value refers to this instruction.  For x86, amd64, ppc32,
2456            ppc64 and arm, the delta value is zero.  For Thumb
2457            instructions, the delta value is one.  This is because, on
2458            Thumb, guest PC values (guest_R15T) are encoded using the
2459            top 31 bits of the instruction address and a 1 in the lsb;
2460            hence they appear to be (numerically) 1 past the start of
2461            the instruction they refer to.  IOW, guest_R15T on ARM
2462            holds a standard ARM interworking address.
2463
2464            ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
2465                         eg. ------ IMark(0x4000792, 5, 0) ------,
2466         */
2467         struct {
2468            Addr64 addr;   /* instruction address */
2469            Int    len;    /* instruction length */
2470            UChar  delta;  /* addr = program counter as encoded in guest state
2471                                     - delta */
2472         } IMark;
2473
2474         /* META: An ABI hint, which says something about this
2475            platform's ABI.
2476
2477            At the moment, the only AbiHint is one which indicates
2478            that a given chunk of address space, [base .. base+len-1],
2479            has become undefined.  This is used on amd64-linux and
2480            some ppc variants to pass stack-redzoning hints to whoever
2481            wants to see them.  It also indicates the address of the
2482            next (dynamic) instruction that will be executed.  This is
2483            to help Memcheck to origin tracking.
2484
2485            ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
2486                         eg. ====== AbiHint(t1, 16, t2) ======
2487         */
2488         struct {
2489            IRExpr* base;     /* Start  of undefined chunk */
2490            Int     len;      /* Length of undefined chunk */
2491            IRExpr* nia;      /* Address of next (guest) insn */
2492         } AbiHint;
2493
2494         /* Write a guest register, at a fixed offset in the guest state.
2495            ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
2496         */
2497         struct {
2498            Int     offset;   /* Offset into the guest state */
2499            IRExpr* data;     /* The value to write */
2500         } Put;
2501
2502         /* Write a guest register, at a non-fixed offset in the guest
2503            state.  See the comment for GetI expressions for more
2504            information.
2505
2506            ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
2507                         eg. PUTI(64:8xF64)[t5,0] = t1
2508         */
2509         struct {
2510            IRPutI* details;
2511         } PutI;
2512
2513         /* Assign a value to a temporary.  Note that SSA rules require
2514            each tmp is only assigned to once.  IR sanity checking will
2515            reject any block containing a temporary which is not assigned
2516            to exactly once.
2517
2518            ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2519         */
2520         struct {
2521            IRTemp  tmp;   /* Temporary  (LHS of assignment) */
2522            IRExpr* data;  /* Expression (RHS of assignment) */
2523         } WrTmp;
2524
2525         /* Write a value to memory.  This is a normal store, not a
2526            Store-Conditional.  To represent a Store-Conditional,
2527            instead use IRStmt.LLSC.
2528            ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2529         */
2530         struct {
2531            IREndness end;    /* Endianness of the store */
2532            IRExpr*   addr;   /* store address */
2533            IRExpr*   data;   /* value to write */
2534         } Store;
2535
2536         /* Guarded store.  Note that this is defined to evaluate all
2537            expression fields (addr, data) even if the guard evaluates
2538            to false.
2539            ppIRStmt output:
2540              if (<guard>) ST<end>(<addr>) = <data> */
2541         struct {
2542            IRStoreG* details;
2543         } StoreG;
2544
2545         /* Guarded load.  Note that this is defined to evaluate all
2546            expression fields (addr, alt) even if the guard evaluates
2547            to false.
2548            ppIRStmt output:
2549              t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */
2550         struct {
2551            IRLoadG* details;
2552         } LoadG;
2553
2554         /* Do an atomic compare-and-swap operation.  Semantics are
2555            described above on a comment at the definition of IRCAS.
2556
2557            ppIRStmt output:
2558               t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2559            eg
2560               t1 = CASle(t2 :: t3->Add32(t3,1))
2561               which denotes a 32-bit atomic increment
2562               of a value at address t2
2563
2564            A double-element CAS may also be denoted, in which case <tmp>,
2565            <expected> and <new> are all pairs of items, separated by
2566            commas.
2567         */
2568         struct {
2569            IRCAS* details;
2570         } CAS;
2571
2572         /* Either Load-Linked or Store-Conditional, depending on
2573            STOREDATA.
2574
2575            If STOREDATA is NULL then this is a Load-Linked, meaning
2576            that data is loaded from memory as normal, but a
2577            'reservation' for the address is also lodged in the
2578            hardware.
2579
2580               result = Load-Linked(addr, end)
2581
2582            The data transfer type is the type of RESULT (I32, I64,
2583            etc).  ppIRStmt output:
2584
2585               result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2586
2587            If STOREDATA is not NULL then this is a Store-Conditional,
2588            hence:
2589
2590               result = Store-Conditional(addr, storedata, end)
2591
2592            The data transfer type is the type of STOREDATA and RESULT
2593            has type Ity_I1. The store may fail or succeed depending
2594            on the state of a previously lodged reservation on this
2595            address.  RESULT is written 1 if the store succeeds and 0
2596            if it fails.  eg ppIRStmt output:
2597
2598               result = ( ST<end>-Cond(<addr>) = <storedata> )
2599               eg t3 = ( STbe-Cond(t1, t2) )
2600
2601            In all cases, the address must be naturally aligned for
2602            the transfer type -- any misaligned addresses should be
2603            caught by a dominating IR check and side exit.  This
2604            alignment restriction exists because on at least some
2605            LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2606            misaligned addresses, and we have to actually generate
2607            stwcx. on the host, and we don't want it trapping on the
2608            host.
2609
2610            Summary of rules for transfer type:
2611              STOREDATA == NULL (LL):
2612                transfer type = type of RESULT
2613              STOREDATA != NULL (SC):
2614                transfer type = type of STOREDATA, and RESULT :: Ity_I1
2615         */
2616         struct {
2617            IREndness end;
2618            IRTemp    result;
2619            IRExpr*   addr;
2620            IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
2621         } LLSC;
2622
2623         /* Call (possibly conditionally) a C function that has side
2624            effects (ie. is "dirty").  See the comments above the
2625            IRDirty type declaration for more information.
2626
2627            ppIRStmt output:
2628               t<tmp> = DIRTY <guard> <effects>
2629                  ::: <callee>(<args>)
2630            eg.
2631               t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2632                     ::: foo{0x380035f4}(t2)
2633         */
2634         struct {
2635            IRDirty* details;
2636         } Dirty;
2637
2638         /* A memory bus event - a fence, or acquisition/release of the
2639            hardware bus lock.  IR optimisation treats all these as fences
2640            across which no memory references may be moved.
2641            ppIRStmt output: MBusEvent-Fence,
2642                             MBusEvent-BusLock, MBusEvent-BusUnlock.
2643         */
2644         struct {
2645            IRMBusEvent event;
2646         } MBE;
2647
2648         /* Conditional exit from the middle of an IRSB.
2649            ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2650                         eg. if (t69) goto {Boring} 0x4000AAA:I32
2651            If <guard> is true, the guest state is also updated by
2652            PUT-ing <dst> at <offsIP>.  This is done because a
2653            taken exit must update the guest program counter.
2654         */
2655         struct {
2656            IRExpr*    guard;    /* Conditional expression */
2657            IRConst*   dst;      /* Jump target (constant only) */
2658            IRJumpKind jk;       /* Jump kind */
2659            Int        offsIP;   /* Guest state offset for IP */
2660         } Exit;
2661      } Ist;
2662   }
2663   IRStmt;
2664
2665/* Statement constructors. */
2666extern IRStmt* IRStmt_NoOp    ( void );
2667extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len, UChar delta );
2668extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2669extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
2670extern IRStmt* IRStmt_PutI    ( IRPutI* details );
2671extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
2672extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
2673extern IRStmt* IRStmt_StoreG  ( IREndness end, IRExpr* addr, IRExpr* data,
2674                                IRExpr* guard );
2675extern IRStmt* IRStmt_LoadG   ( IREndness end, IRLoadGOp cvt, IRTemp dst,
2676                                IRExpr* addr, IRExpr* alt, IRExpr* guard );
2677extern IRStmt* IRStmt_CAS     ( IRCAS* details );
2678extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
2679                                IRExpr* addr, IRExpr* storedata );
2680extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
2681extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
2682extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
2683                                Int offsIP );
2684
2685/* Deep-copy an IRStmt. */
2686extern IRStmt* deepCopyIRStmt ( IRStmt* );
2687
2688/* Pretty-print an IRStmt. */
2689extern void ppIRStmt ( IRStmt* );
2690
2691
2692/* ------------------ Basic Blocks ------------------ */
2693
2694/* Type environments: a bunch of statements, expressions, etc, are
2695   incomplete without an environment indicating the type of each
2696   IRTemp.  So this provides one.  IR temporaries are really just
2697   unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2698   them.
2699*/
2700typedef
2701   struct {
2702      IRType* types;
2703      Int     types_size;
2704      Int     types_used;
2705   }
2706   IRTypeEnv;
2707
2708/* Obtain a new IRTemp */
2709extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2710
2711/* Deep-copy a type environment */
2712extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2713
2714/* Pretty-print a type environment */
2715extern void ppIRTypeEnv ( IRTypeEnv* );
2716
2717
2718/* Code blocks, which in proper compiler terminology are superblocks
2719   (single entry, multiple exit code sequences) contain:
2720
2721   - A table giving a type for each temp (the "type environment")
2722   - An expandable array of statements
2723   - An expression of type 32 or 64 bits, depending on the
2724     guest's word size, indicating the next destination if the block
2725     executes all the way to the end, without a side exit
2726   - An indication of any special actions (JumpKind) needed
2727     for this final jump.
2728   - Offset of the IP field in the guest state.  This will be
2729     updated before the final jump is done.
2730
2731   "IRSB" stands for "IR Super Block".
2732*/
2733typedef
2734   struct {
2735      IRTypeEnv* tyenv;
2736      IRStmt**   stmts;
2737      Int        stmts_size;
2738      Int        stmts_used;
2739      IRExpr*    next;
2740      IRJumpKind jumpkind;
2741      Int        offsIP;
2742   }
2743   IRSB;
2744
2745/* Allocate a new, uninitialised IRSB */
2746extern IRSB* emptyIRSB ( void );
2747
2748/* Deep-copy an IRSB */
2749extern IRSB* deepCopyIRSB ( IRSB* );
2750
2751/* Deep-copy an IRSB, except for the statements list, which set to be
2752   a new, empty, list of statements. */
2753extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2754
2755/* Pretty-print an IRSB */
2756extern void ppIRSB ( IRSB* );
2757
2758/* Append an IRStmt to an IRSB */
2759extern void addStmtToIRSB ( IRSB*, IRStmt* );
2760
2761
2762/*---------------------------------------------------------------*/
2763/*--- Helper functions for the IR                             ---*/
2764/*---------------------------------------------------------------*/
2765
2766/* For messing with IR type environments */
2767extern IRTypeEnv* emptyIRTypeEnv  ( void );
2768
2769/* What is the type of this expression? */
2770extern IRType typeOfIRConst ( IRConst* );
2771extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
2772extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
2773
2774/* What are the arg and result type for this IRLoadGOp? */
2775extern void typeOfIRLoadGOp ( IRLoadGOp cvt,
2776                              /*OUT*/IRType* t_res,
2777                              /*OUT*/IRType* t_arg );
2778
2779/* Sanity check a BB of IR */
2780extern void sanityCheckIRSB ( IRSB*  bb,
2781                              const  HChar* caller,
2782                              Bool   require_flatness,
2783                              IRType guest_word_size );
2784extern Bool isFlatIRStmt ( IRStmt* );
2785
2786/* Is this any value actually in the enumeration 'IRType' ? */
2787extern Bool isPlausibleIRType ( IRType ty );
2788
2789
2790/*---------------------------------------------------------------*/
2791/*--- IR injection                                            ---*/
2792/*---------------------------------------------------------------*/
2793void vex_inject_ir(IRSB *, IREndness);
2794
2795
2796#endif /* ndef __LIBVEX_IR_H */
2797
2798
2799/*---------------------------------------------------------------*/
2800/*---                                             libvex_ir.h ---*/
2801/*---------------------------------------------------------------*/
2802