libvex_ir.h revision 051d3b5d6376aa10a65793c3c2267d6ab233b896
1
2/*---------------------------------------------------------------*/
3/*--- begin                                       libvex_ir.h ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#ifndef __LIBVEX_IR_H
37#define __LIBVEX_IR_H
38
39#include "libvex_basictypes.h"
40
41
42/*---------------------------------------------------------------*/
43/*--- High-level IR description                               ---*/
44/*---------------------------------------------------------------*/
45
46/* Vex IR is an architecture-neutral intermediate representation.
47   Unlike some IRs in systems similar to Vex, it is not like assembly
48   language (ie. a list of instructions).  Rather, it is more like the
49   IR that might be used in a compiler.
50
51   Code blocks
52   ~~~~~~~~~~~
53   The code is broken into small code blocks ("superblocks", type:
54   'IRSB').  Each code block typically represents from 1 to perhaps 50
55   instructions.  IRSBs are single-entry, multiple-exit code blocks.
56   Each IRSB contains three things:
57   - a type environment, which indicates the type of each temporary
58     value present in the IRSB
59   - a list of statements, which represent code
60   - a jump that exits from the end the IRSB
61   Because the blocks are multiple-exit, there can be additional
62   conditional exit statements that cause control to leave the IRSB
63   before the final exit.  Also because of this, IRSBs can cover
64   multiple non-consecutive sequences of code (up to 3).  These are
65   recorded in the type VexGuestExtents (see libvex.h).
66
67   Statements and expressions
68   ~~~~~~~~~~~~~~~~~~~~~~~~~~
69   Statements (type 'IRStmt') represent operations with side-effects,
70   eg.  guest register writes, stores, and assignments to temporaries.
71   Expressions (type 'IRExpr') represent operations without
72   side-effects, eg. arithmetic operations, loads, constants.
73   Expressions can contain sub-expressions, forming expression trees,
74   eg. (3 + (4 * load(addr1)).
75
76   Storage of guest state
77   ~~~~~~~~~~~~~~~~~~~~~~
78   The "guest state" contains the guest registers of the guest machine
79   (ie.  the machine that we are simulating).  It is stored by default
80   in a block of memory supplied by the user of the VEX library,
81   generally referred to as the guest state (area).  To operate on
82   these registers, one must first read ("Get") them from the guest
83   state into a temporary value.  Afterwards, one can write ("Put")
84   them back into the guest state.
85
86   Get and Put are characterised by a byte offset into the guest
87   state, a small integer which effectively gives the identity of the
88   referenced guest register, and a type, which indicates the size of
89   the value to be transferred.
90
91   The basic "Get" and "Put" operations are sufficient to model normal
92   fixed registers on the guest.  Selected areas of the guest state
93   can be treated as a circular array of registers (type:
94   'IRRegArray'), which can be indexed at run-time.  This is done with
95   the "GetI" and "PutI" primitives.  This is necessary to describe
96   rotating register files, for example the x87 FPU stack, SPARC
97   register windows, and the Itanium register files.
98
99   Examples, and flattened vs. unflattened code
100   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101   For example, consider this x86 instruction:
102
103     addl %eax, %ebx
104
105   One Vex IR translation for this code would be this:
106
107     ------ IMark(0x24F275, 7, 0) ------
108     t3 = GET:I32(0)             # get %eax, a 32-bit integer
109     t2 = GET:I32(12)            # get %ebx, a 32-bit integer
110     t1 = Add32(t3,t2)           # addl
111     PUT(0) = t1                 # put %eax
112
113   (For simplicity, this ignores the effects on the condition codes, and
114   the update of the instruction pointer.)
115
116   The "IMark" is an IR statement that doesn't represent actual code.
117   Instead it indicates the address and length of the original
118   instruction.  The numbers 0 and 12 are offsets into the guest state
119   for %eax and %ebx.  The full list of offsets for an architecture
120   <ARCH> can be found in the type VexGuest<ARCH>State in the file
121   VEX/pub/libvex_guest_<ARCH>.h.
122
123   The five statements in this example are:
124   - the IMark
125   - three assignments to temporaries
126   - one register write (put)
127
128   The six expressions in this example are:
129   - two register reads (gets)
130   - one arithmetic (add) operation
131   - three temporaries (two nested within the Add32, one in the PUT)
132
133   The above IR is "flattened", ie. all sub-expressions are "atoms",
134   either constants or temporaries.  An equivalent, unflattened version
135   would be:
136
137     PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139   IR is guaranteed to be flattened at instrumentation-time.  This makes
140   instrumentation easier.  Equivalent flattened and unflattened IR
141   typically results in the same generated code.
142
143   Another example, this one showing loads and stores:
144
145     addl %edx,4(%eax)
146
147   This becomes (again ignoring condition code and instruction pointer
148   updates):
149
150     ------ IMark(0x4000ABA, 3, 0) ------
151     t3 = Add32(GET:I32(0),0x4:I32)
152     t2 = LDle:I32(t3)
153     t1 = GET:I32(8)
154     t0 = Add32(t2,t1)
155     STle(t3) = t0
156
157   The "le" in "LDle" and "STle" is short for "little-endian".
158
159   No need for deallocations
160   ~~~~~~~~~~~~~~~~~~~~~~~~~
161   Although there are allocation functions for various data structures
162   in this file, there are no deallocation functions.  This is because
163   Vex uses a memory allocation scheme that automatically reclaims the
164   memory used by allocated structures once translation is completed.
165   This makes things easier for tools that instruments/transforms code
166   blocks.
167
168   SSAness and typing
169   ~~~~~~~~~~~~~~~~~~
170   The IR is fully typed.  For every IRSB (IR block) it is possible to
171   say unambiguously whether or not it is correctly typed.
172   Incorrectly typed IR has no meaning and the VEX will refuse to
173   process it.  At various points during processing VEX typechecks the
174   IR and aborts if any violations are found.  This seems overkill but
175   makes it a great deal easier to build a reliable JIT.
176
177   IR also has the SSA property.  SSA stands for Static Single
178   Assignment, and what it means is that each IR temporary may be
179   assigned to only once.  This idea became widely used in compiler
180   construction in the mid to late 90s.  It makes many IR-level
181   transformations/code improvements easier, simpler and faster.
182   Whenever it typechecks an IR block, VEX also checks the SSA
183   property holds, and will abort if not so.  So SSAness is
184   mechanically and rigidly enforced.
185*/
186
187/*---------------------------------------------------------------*/
188/*--- Type definitions for the IR                             ---*/
189/*---------------------------------------------------------------*/
190
191/* General comments about naming schemes:
192
193   All publically visible functions contain the name of the primary
194   type on which they operate (IRFoo, IRBar, etc).  Hence you should
195   be able to identify these functions by grepping for "IR[A-Z]".
196
197   For some type 'IRFoo':
198
199   - ppIRFoo is the printing method for IRFoo, printing it to the
200     output channel specified in the LibVEX_Initialise call.
201
202   - eqIRFoo is a structural equality predicate for IRFoos.
203
204   - deepCopyIRFoo is a deep copy constructor for IRFoos.
205     It recursively traverses the entire argument tree and
206     produces a complete new tree.  All types have a deep copy
207     constructor.
208
209   - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210     It creates a new top-level copy of the supplied object,
211     but does not copy any sub-objects.  Only some types have a
212     shallow copy constructor.
213*/
214
215/* ------------------ Types ------------------ */
216
217/* A type indicates the size of a value, and whether it's an integer, a
218   float, or a vector (SIMD) value. */
219typedef
220   enum {
221      Ity_INVALID=0x1100,
222      Ity_I1,
223      Ity_I8,
224      Ity_I16,
225      Ity_I32,
226      Ity_I64,
227      Ity_I128,  /* 128-bit scalar */
228      Ity_F32,   /* IEEE 754 float */
229      Ity_F64,   /* IEEE 754 double */
230      Ity_D32,   /* 32-bit Decimal floating point */
231      Ity_D64,   /* 64-bit Decimal floating point */
232      Ity_D128,  /* 128-bit Decimal floating point */
233      Ity_F128,  /* 128-bit floating point; implementation defined */
234      Ity_V128,  /* 128-bit SIMD */
235      Ity_V256   /* 256-bit SIMD */
236   }
237   IRType;
238
239/* Pretty-print an IRType */
240extern void ppIRType ( IRType );
241
242/* Get the size (in bytes) of an IRType */
243extern Int sizeofIRType ( IRType );
244
245/* Translate 1/2/4/8 into Ity_I{8,16,32,64} respectively.  Asserts on
246   any other input. */
247extern IRType integerIRTypeOfSize ( Int szB );
248
249
250/* ------------------ Endianness ------------------ */
251
252/* IREndness is used in load IRExprs and store IRStmts. */
253typedef
254   enum {
255      Iend_LE=0x1200, /* little endian */
256      Iend_BE          /* big endian */
257   }
258   IREndness;
259
260
261/* ------------------ Constants ------------------ */
262
263/* IRConsts are used within 'Const' and 'Exit' IRExprs. */
264
265/* The various kinds of constant. */
266typedef
267   enum {
268      Ico_U1=0x1300,
269      Ico_U8,
270      Ico_U16,
271      Ico_U32,
272      Ico_U64,
273      Ico_F32,   /* 32-bit IEEE754 floating */
274      Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
275                    as a IEEE754 single value. */
276      Ico_F64,   /* 64-bit IEEE754 floating */
277      Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
278                    as a IEEE754 double value. */
279      Ico_V128,  /* 128-bit restricted vector constant, with 1 bit
280                    (repeated 8 times) for each of the 16 x 1-byte lanes */
281      Ico_V256   /* 256-bit restricted vector constant, with 1 bit
282                    (repeated 8 times) for each of the 32 x 1-byte lanes */
283   }
284   IRConstTag;
285
286/* A constant.  Stored as a tagged union.  'tag' indicates what kind of
287   constant this is.  'Ico' is the union that holds the fields.  If an
288   IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
289   and its value can be accessed with 'c.Ico.U32'. */
290typedef
291   struct _IRConst {
292      IRConstTag tag;
293      union {
294         Bool   U1;
295         UChar  U8;
296         UShort U16;
297         UInt   U32;
298         ULong  U64;
299         Float  F32;
300         UInt   F32i;
301         Double F64;
302         ULong  F64i;
303         UShort V128;   /* 16-bit value; see Ico_V128 comment above */
304         UInt   V256;   /* 32-bit value; see Ico_V256 comment above */
305      } Ico;
306   }
307   IRConst;
308
309/* IRConst constructors */
310extern IRConst* IRConst_U1   ( Bool );
311extern IRConst* IRConst_U8   ( UChar );
312extern IRConst* IRConst_U16  ( UShort );
313extern IRConst* IRConst_U32  ( UInt );
314extern IRConst* IRConst_U64  ( ULong );
315extern IRConst* IRConst_F32  ( Float );
316extern IRConst* IRConst_F32i ( UInt );
317extern IRConst* IRConst_F64  ( Double );
318extern IRConst* IRConst_F64i ( ULong );
319extern IRConst* IRConst_V128 ( UShort );
320extern IRConst* IRConst_V256 ( UInt );
321
322/* Deep-copy an IRConst */
323extern IRConst* deepCopyIRConst ( IRConst* );
324
325/* Pretty-print an IRConst */
326extern void ppIRConst ( IRConst* );
327
328/* Compare two IRConsts for equality */
329extern Bool eqIRConst ( IRConst*, IRConst* );
330
331
332/* ------------------ Call targets ------------------ */
333
334/* Describes a helper function to call.  The name part is purely for
335   pretty printing and not actually used.  regparms=n tells the back
336   end that the callee has been declared
337   "__attribute__((regparm(n)))", although indirectly using the
338   VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
339   to construct a non-standard sequence to call a function declared
340   like this.
341
342   mcx_mask is a sop to Memcheck.  It indicates which args should be
343   considered 'always defined' when lazily computing definedness of
344   the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
345   args[1], etc.  If a bit is set, the corresponding arg is excluded
346   (hence "x" in "mcx") from definedness checking.
347*/
348
349typedef
350   struct {
351      Int          regparms;
352      const HChar* name;
353      void*        addr;
354      UInt         mcx_mask;
355   }
356   IRCallee;
357
358/* Create an IRCallee. */
359extern IRCallee* mkIRCallee ( Int regparms, const HChar* name, void* addr );
360
361/* Deep-copy an IRCallee. */
362extern IRCallee* deepCopyIRCallee ( IRCallee* );
363
364/* Pretty-print an IRCallee. */
365extern void ppIRCallee ( IRCallee* );
366
367
368/* ------------------ Guest state arrays ------------------ */
369
370/* This describes a section of the guest state that we want to
371   be able to index at run time, so as to be able to describe
372   indexed or rotating register files on the guest. */
373typedef
374   struct {
375      Int    base;   /* guest state offset of start of indexed area */
376      IRType elemTy; /* type of each element in the indexed area */
377      Int    nElems; /* number of elements in the indexed area */
378   }
379   IRRegArray;
380
381extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
382
383extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
384
385extern void ppIRRegArray ( IRRegArray* );
386extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
387
388
389/* ------------------ Temporaries ------------------ */
390
391/* This represents a temporary, eg. t1.  The IR optimiser relies on the
392   fact that IRTemps are 32-bit ints.  Do not change them to be ints of
393   any other size. */
394typedef UInt IRTemp;
395
396/* Pretty-print an IRTemp. */
397extern void ppIRTemp ( IRTemp );
398
399#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
400
401
402/* --------------- Primops (arity 1,2,3 and 4) --------------- */
403
404/* Primitive operations that are used in Unop, Binop, Triop and Qop
405   IRExprs.  Once we take into account integer, floating point and SIMD
406   operations of all the different sizes, there are quite a lot of them.
407   Most instructions supported by the architectures that Vex supports
408   (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
409   are not;  they are instead handled with dirty helpers that emulate
410   their functionality.  Such obscure ones are thus not directly visible
411   in the IR, but their effects on guest state (memory and registers)
412   are made visible via the annotations in IRDirty structures.
413*/
414typedef
415   enum {
416      /* -- Do not change this ordering.  The IR generators rely on
417            (eg) Iop_Add64 == IopAdd8 + 3. -- */
418
419      Iop_INVALID=0x1400,
420      Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
421      Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
422      /* Signless mul.  MullS/MullU is elsewhere. */
423      Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
424      Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
425      Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
426      Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
427      Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
428      Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
429      Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
430      /* Integer comparisons. */
431      Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
432      Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
433      /* Tags for unary ops */
434      Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
435
436      /* Exactly like CmpEQ8/16/32/64, but carrying the additional
437         hint that these compute the success/failure of a CAS
438         operation, and hence are almost certainly applied to two
439         copies of the same value, which in turn has implications for
440         Memcheck's instrumentation. */
441      Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
442      Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
443
444      /* Exactly like CmpNE8/16/32/64, but carrying the additional
445         hint that these needs expensive definedness tracking. */
446      Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64,
447
448      /* -- Ordering not important after here. -- */
449
450      /* Widening multiplies */
451      Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
452      Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
453
454      /* Wierdo integer stuff */
455      Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
456      Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
457      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
458         zero.  You must ensure they are never given a zero argument.
459      */
460
461      /* Standard integer comparisons */
462      Iop_CmpLT32S, Iop_CmpLT64S,
463      Iop_CmpLE32S, Iop_CmpLE64S,
464      Iop_CmpLT32U, Iop_CmpLT64U,
465      Iop_CmpLE32U, Iop_CmpLE64U,
466
467      /* As a sop to Valgrind-Memcheck, the following are useful. */
468      Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
469      Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
470      Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
471      Iop_Max32U, /* unsigned max */
472
473      /* PowerPC-style 3-way integer comparisons.  Without them it is
474         difficult to simulate PPC efficiently.
475         op(x,y) | x < y  = 0x8 else
476                 | x > y  = 0x4 else
477                 | x == y = 0x2
478      */
479      Iop_CmpORD32U, Iop_CmpORD64U,
480      Iop_CmpORD32S, Iop_CmpORD64S,
481
482      /* Division */
483      /* TODO: clarify semantics wrt rounding, negative values, whatever */
484      Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
485      Iop_DivS32,   // ditto, signed
486      Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
487      Iop_DivS64,   // ditto, signed
488      Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi)
489                    //                    concat with 64 0's (low))
490      Iop_DivS64E,  // ditto, signed
491      Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi)
492                    // concat with 32 0's (low))
493      Iop_DivS32E,  // ditto, signed
494
495      Iop_DivModU64to32, // :: I64,I32 -> I64
496                         // of which lo half is div and hi half is mod
497      Iop_DivModS64to32, // ditto, signed
498
499      Iop_DivModU128to64, // :: V128,I64 -> V128
500                          // of which lo half is div and hi half is mod
501      Iop_DivModS128to64, // ditto, signed
502
503      Iop_DivModS64to64, // :: I64,I64 -> I128
504                         // of which lo half is div and hi half is mod
505
506      /* Integer conversions.  Some of these are redundant (eg
507         Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
508         having a complete set reduces the typical dynamic size of IR
509         and makes the instruction selectors easier to write. */
510
511      /* Widening conversions */
512      Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
513                  Iop_16Uto32, Iop_16Uto64,
514                               Iop_32Uto64,
515      Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
516                  Iop_16Sto32, Iop_16Sto64,
517                               Iop_32Sto64,
518
519      /* Narrowing conversions */
520      Iop_64to8, Iop_32to8, Iop_64to16,
521      /* 8 <-> 16 bit conversions */
522      Iop_16to8,      // :: I16 -> I8, low half
523      Iop_16HIto8,    // :: I16 -> I8, high half
524      Iop_8HLto16,    // :: (I8,I8) -> I16
525      /* 16 <-> 32 bit conversions */
526      Iop_32to16,     // :: I32 -> I16, low half
527      Iop_32HIto16,   // :: I32 -> I16, high half
528      Iop_16HLto32,   // :: (I16,I16) -> I32
529      /* 32 <-> 64 bit conversions */
530      Iop_64to32,     // :: I64 -> I32, low half
531      Iop_64HIto32,   // :: I64 -> I32, high half
532      Iop_32HLto64,   // :: (I32,I32) -> I64
533      /* 64 <-> 128 bit conversions */
534      Iop_128to64,    // :: I128 -> I64, low half
535      Iop_128HIto64,  // :: I128 -> I64, high half
536      Iop_64HLto128,  // :: (I64,I64) -> I128
537      /* 1-bit stuff */
538      Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
539      Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
540      Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
541      Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
542      Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
543      Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
544      Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
545      Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
546      Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
547      Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
548
549      /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
550
551      /* --- Simple stuff as mandated by 754. --- */
552
553      /* Binary operations, with rounding. */
554      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
555      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
556
557      /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
558      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
559
560      /* Variants of the above which produce a 64-bit result but which
561         round their result to a IEEE float range first. */
562      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
563      Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
564
565      /* Unary operations, without rounding. */
566      /* :: F64 -> F64 */
567      Iop_NegF64, Iop_AbsF64,
568
569      /* :: F32 -> F32 */
570      Iop_NegF32, Iop_AbsF32,
571
572      /* Unary operations, with rounding. */
573      /* :: IRRoundingMode(I32) x F64 -> F64 */
574      Iop_SqrtF64,
575
576      /* :: IRRoundingMode(I32) x F32 -> F32 */
577      Iop_SqrtF32,
578
579      /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
580            0x45 Unordered
581            0x01 LT
582            0x00 GT
583            0x40 EQ
584         This just happens to be the Intel encoding.  The values
585         are recorded in the type IRCmpF64Result.
586      */
587      /* :: F64 x F64 -> IRCmpF64Result(I32) */
588      Iop_CmpF64,
589      Iop_CmpF32,
590      Iop_CmpF128,
591
592      /* --- Int to/from FP conversions. --- */
593
594      /* For the most part, these take a first argument :: Ity_I32 (as
595         IRRoundingMode) which is an indication of the rounding mode
596         to use, as per the following encoding ("the standard
597         encoding"):
598            00b  to nearest (the default)
599            01b  to -infinity
600            10b  to +infinity
601            11b  to zero
602         This just happens to be the Intel encoding.  For reference only,
603         the PPC encoding is:
604            00b  to nearest (the default)
605            01b  to zero
606            10b  to +infinity
607            11b  to -infinity
608         Any PPC -> IR front end will have to translate these PPC
609         encodings, as encoded in the guest state, to the standard
610         encodings, to pass to the primops.
611         For reference only, the ARM VFP encoding is:
612            00b  to nearest
613            01b  to +infinity
614            10b  to -infinity
615            11b  to zero
616         Again, this will have to be converted to the standard encoding
617         to pass to primops.
618
619         If one of these conversions gets an out-of-range condition,
620         or a NaN, as an argument, the result is host-defined.  On x86
621         the "integer indefinite" value 0x80..00 is produced.  On PPC
622         it is either 0x80..00 or 0x7F..FF depending on the sign of
623         the argument.
624
625         On ARMvfp, when converting to a signed integer result, the
626         overflow result is 0x80..00 for negative args and 0x7F..FF
627         for positive args.  For unsigned integer results it is
628         0x00..00 and 0xFF..FF respectively.
629
630         Rounding is required whenever the destination type cannot
631         represent exactly all values of the source type.
632      */
633      Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
634      Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
635      Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
636      Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
637
638      Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
639
640      Iop_I32StoF64, /*                       signed I32 -> F64 */
641      Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
642      Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
643      Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
644
645      Iop_I32UtoF32, /* IRRoundingMode(I32) x unsigned I32 -> F32 */
646      Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
647
648      Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
649      Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
650      Iop_F32toI32U, /* IRRoundingMode(I32) x F32 -> unsigned I32 */
651      Iop_F32toI64U, /* IRRoundingMode(I32) x F32 -> unsigned I64 */
652
653      Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
654      Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
655
656      /* Conversion between floating point formats */
657      Iop_F32toF64,  /*                       F32 -> F64 */
658      Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
659
660      /* Reinterpretation.  Take an F64 and produce an I64 with
661         the same bit pattern, or vice versa. */
662      Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
663      Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
664
665      /* Support for 128-bit floating point */
666      Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
667      Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
668      Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
669
670      /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
671      Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
672
673      /* :: F128 -> F128 */
674      Iop_NegF128, Iop_AbsF128,
675
676      /* :: IRRoundingMode(I32) x F128 -> F128 */
677      Iop_SqrtF128,
678
679      Iop_I32StoF128, /*                signed I32  -> F128 */
680      Iop_I64StoF128, /*                signed I64  -> F128 */
681      Iop_I32UtoF128, /*              unsigned I32  -> F128 */
682      Iop_I64UtoF128, /*              unsigned I64  -> F128 */
683      Iop_F32toF128,  /*                       F32  -> F128 */
684      Iop_F64toF128,  /*                       F64  -> F128 */
685
686      Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
687      Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
688      Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32  */
689      Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64  */
690      Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
691      Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
692
693      /* --- guest x86/amd64 specifics, not mandated by 754. --- */
694
695      /* Binary ops, with rounding. */
696      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
697      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
698      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
699      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
700      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
701      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
702      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
703      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
704      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
705      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
706         as the IEEE mandated RemF64, except it is limited in the
707         range of its operand.  Hence the partialness. */
708
709      /* Unary ops, with rounding. */
710      /* :: IRRoundingMode(I32) x F64 -> F64 */
711      Iop_SinF64,    /* FSIN */
712      Iop_CosF64,    /* FCOS */
713      Iop_TanF64,    /* FTAN */
714      Iop_2xm1F64,   /* (2^arg - 1.0) */
715      Iop_RoundF64toInt, /* F64 value to nearest integral value (still
716                            as F64) */
717      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
718                            as F32) */
719
720      /* --- guest s390 specifics, not mandated by 754. --- */
721
722      /* Fused multiply-add/sub */
723      /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
724            (computes arg2 * arg3 +/- arg4) */
725      Iop_MAddF32, Iop_MSubF32,
726
727      /* --- guest ppc32/64 specifics, not mandated by 754. --- */
728
729      /* Ternary operations, with rounding. */
730      /* Fused multiply-add/sub, with 112-bit intermediate
731         precision for ppc.
732         Also used to implement fused multiply-add/sub for s390. */
733      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
734            (computes arg2 * arg3 +/- arg4) */
735      Iop_MAddF64, Iop_MSubF64,
736
737      /* Variants of the above which produce a 64-bit result but which
738         round their result to a IEEE float range first. */
739      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
740      Iop_MAddF64r32, Iop_MSubF64r32,
741
742      /* :: F64 -> F64 */
743      Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
744      Iop_RoundF64toF64_NEAREST, /* frin */
745      Iop_RoundF64toF64_NegINF,  /* frim */
746      Iop_RoundF64toF64_PosINF,  /* frip */
747      Iop_RoundF64toF64_ZERO,    /* friz */
748
749      /* :: F64 -> F32 */
750      Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
751
752      /* :: IRRoundingMode(I32) x F64 -> F64 */
753      Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
754      /* NB: pretty much the same as Iop_F64toF32, except no change
755         of type. */
756
757      /* ------------------ 32-bit SIMD Integer ------------------ */
758
759      /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
760      Iop_QAdd32S,
761      Iop_QSub32S,
762
763      /* 16x2 add/sub, also signed/unsigned saturating variants */
764      Iop_Add16x2, Iop_Sub16x2,
765      Iop_QAdd16Sx2, Iop_QAdd16Ux2,
766      Iop_QSub16Sx2, Iop_QSub16Ux2,
767
768      /* 16x2 signed/unsigned halving add/sub.  For each lane, these
769         compute bits 16:1 of (eg) sx(argL) + sx(argR),
770         or zx(argL) - zx(argR) etc. */
771      Iop_HAdd16Ux2, Iop_HAdd16Sx2,
772      Iop_HSub16Ux2, Iop_HSub16Sx2,
773
774      /* 8x4 add/sub, also signed/unsigned saturating variants */
775      Iop_Add8x4, Iop_Sub8x4,
776      Iop_QAdd8Sx4, Iop_QAdd8Ux4,
777      Iop_QSub8Sx4, Iop_QSub8Ux4,
778
779      /* 8x4 signed/unsigned halving add/sub.  For each lane, these
780         compute bits 8:1 of (eg) sx(argL) + sx(argR),
781         or zx(argL) - zx(argR) etc. */
782      Iop_HAdd8Ux4, Iop_HAdd8Sx4,
783      Iop_HSub8Ux4, Iop_HSub8Sx4,
784
785      /* 8x4 sum of absolute unsigned differences. */
786      Iop_Sad8Ux4,
787
788      /* MISC (vector integer cmp != 0) */
789      Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
790
791      /* ------------------ 64-bit SIMD FP ------------------------ */
792
793      /* Convertion to/from int */
794      Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
795      Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
796      /* Fixed32 format is floating-point number with fixed number of fraction
797         bits. The number of fraction bits is passed as a second argument of
798         type I8. */
799      Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
800      Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
801
802      /* Binary operations */
803      Iop_Max32Fx2,      Iop_Min32Fx2,
804      /* Pairwise Min and Max. See integer pairwise operations for more
805         details. */
806      Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
807      /* Note: For the following compares, the arm front-end assumes a
808         nan in a lane of either argument returns zero for that lane. */
809      Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
810
811      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
812      element in the operand vector, and places the results in the destination
813      vector.  */
814      Iop_Recip32Fx2,
815
816      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
817         Note, that if one of the arguments is zero and another one is infinity
818         of arbitrary sign the result of the operation is 2.0. */
819      Iop_Recps32Fx2,
820
821      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
822         square root of each element in the operand vector. */
823      Iop_Rsqrte32Fx2,
824
825      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
826         Note, that of one of the arguments is zero and another one is infiinty
827         of arbitrary sign the result of the operation is 1.5. */
828      Iop_Rsqrts32Fx2,
829
830      /* Unary */
831      Iop_Neg32Fx2, Iop_Abs32Fx2,
832
833      /* ------------------ 64-bit SIMD Integer. ------------------ */
834
835      /* MISC (vector integer cmp != 0) */
836      Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
837
838      /* ADDITION (normal / unsigned sat / signed sat) */
839      Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
840      Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
841      Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
842
843      /* PAIRWISE operations */
844      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
845            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
846      Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
847      Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
848      Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
849      Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
850      Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
851      /* Longening variant is unary. The resulting vector contains two times
852         less elements than operand, but they are two times wider.
853         Example:
854            Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
855               where a+b and c+d are unsigned 32-bit values. */
856      Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
857      Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
858
859      /* SUBTRACTION (normal / unsigned sat / signed sat) */
860      Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
861      Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
862      Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
863
864      /* ABSOLUTE VALUE */
865      Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
866
867      /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
868      Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
869      Iop_Mul32Fx2,
870      Iop_MulHi16Ux4,
871      Iop_MulHi16Sx4,
872      /* Plynomial multiplication treats it's arguments as coefficients of
873         polynoms over {0, 1}. */
874      Iop_PolynomialMul8x8,
875
876      /* Vector Saturating Doubling Multiply Returning High Half and
877         Vector Saturating Rounding Doubling Multiply Returning High Half */
878      /* These IROp's multiply corresponding elements in two vectors, double
879         the results, and place the most significant half of the final results
880         in the destination vector. The results are truncated or rounded. If
881         any of the results overflow, they are saturated. */
882      Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
883      Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
884
885      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
886      Iop_Avg8Ux8,
887      Iop_Avg16Ux4,
888
889      /* MIN/MAX */
890      Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
891      Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
892      Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
893      Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
894
895      /* COMPARISON */
896      Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
897      Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
898      Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
899
900      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
901         bit) */
902      Iop_Cnt8x8,
903      Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
904      Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
905      Iop_Clz64x2,
906
907      /* VECTOR x VECTOR SHIFT / ROTATE */
908      Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
909      Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
910      Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
911      Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
912
913      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
914      Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
915      Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
916      Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
917
918      /* VECTOR x VECTOR SATURATING SHIFT */
919      Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
920      Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
921      /* VECTOR x INTEGER SATURATING SHIFT */
922      Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
923      Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
924      Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
925
926      /* NARROWING (binary)
927         -- narrow 2xI64 into 1xI64, hi half from left arg */
928      /* For saturated narrowing, I believe there are 4 variants of
929         the basic arithmetic operation, depending on the signedness
930         of argument and result.  Here are examples that exemplify
931         what I mean:
932
933         QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
934                                      return x[7:0];
935
936         QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
937                                      if (x >s  127) x = 127;
938                                      return x[7:0];
939
940         QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
941                                      return x[7:0];
942
943         QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
944                                      if (x >s 255) x = 255;
945                                      return x[7:0];
946      */
947      Iop_QNarrowBin16Sto8Ux8,
948      Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
949      Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
950
951      /* INTERLEAVING */
952      /* Interleave lanes from low or high halves of
953         operands.  Most-significant result lane is from the left
954         arg. */
955      Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
956      Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
957      /* Interleave odd/even lanes of operands.  Most-significant result lane
958         is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
959         identical to Interleave{HI,LO}32x2 and so are omitted.*/
960      Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
961      Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
962
963      /* CONCATENATION -- build a new value by concatenating either
964         the even or odd lanes of both operands.  Note that
965         Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
966         and so are omitted. */
967      Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
968      Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
969
970      /* GET / SET elements of VECTOR
971         GET is binop (I64, I8) -> I<elem_size>
972         SET is triop (I64, I8, I<elem_size>) -> I64 */
973      /* Note: the arm back-end handles only constant second argument */
974      Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
975      Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
976
977      /* DUPLICATING -- copy value to all lanes */
978      Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
979
980      /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
981         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
982         result.
983         It is a triop: (I64, I64, I8) -> I64 */
984      /* Note: the arm back-end handles only constant third argumnet. */
985      Iop_Extract64,
986
987      /* REVERSE the order of elements in each Half-words, Words,
988         Double-words */
989      /* Examples:
990            Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
991            Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
992            Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
993      Iop_Reverse16_8x8,
994      Iop_Reverse32_8x8, Iop_Reverse32_16x4,
995      Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
996
997      /* PERMUTING -- copy src bytes to dst,
998         as indexed by control vector bytes:
999            for i in 0 .. 7 . result[i] = argL[ argR[i] ]
1000         argR[i] values may only be in the range 0 .. 7, else behaviour
1001         is undefined. */
1002      Iop_Perm8x8,
1003
1004      /* MISC CONVERSION -- get high bits of each byte lane, a la
1005         x86/amd64 pmovmskb */
1006      Iop_GetMSBs8x8, /* I64 -> I8 */
1007
1008      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1009         See floating-point equiwalents for details. */
1010      Iop_Recip32x2, Iop_Rsqrte32x2,
1011
1012      /* ------------------ Decimal Floating Point ------------------ */
1013
1014      /* ARITHMETIC INSTRUCTIONS   64-bit
1015	 ----------------------------------
1016	 IRRoundingMode(I32) X D64 X D64 -> D64
1017      */
1018      Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64,
1019
1020      /* ARITHMETIC INSTRUCTIONS  128-bit
1021	 ----------------------------------
1022	 IRRoundingMode(I32) X D128 X D128 -> D128
1023      */
1024      Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128,
1025
1026      /* SHIFT SIGNIFICAND INSTRUCTIONS
1027       *    The DFP significand is shifted by the number of digits specified
1028       *    by the U8 operand.  Digits shifted out of the leftmost digit are
1029       *    lost. Zeros are supplied to the vacated positions on the right.
1030       *    The sign of the result is the same as the sign of the original
1031       *    operand.
1032       *
1033       * D64 x U8  -> D64    left shift and right shift respectively */
1034      Iop_ShlD64, Iop_ShrD64,
1035
1036      /* D128 x U8  -> D128  left shift and right shift respectively */
1037      Iop_ShlD128, Iop_ShrD128,
1038
1039
1040      /* FORMAT CONVERSION INSTRUCTIONS
1041       *   D32 -> D64
1042       */
1043      Iop_D32toD64,
1044
1045      /*   D64 -> D128 */
1046      Iop_D64toD128,
1047
1048      /*   I32S -> D128 */
1049      Iop_I32StoD128,
1050
1051      /*   I32U -> D128 */
1052      Iop_I32UtoD128,
1053
1054      /*   I64S -> D128 */
1055      Iop_I64StoD128,
1056
1057      /*   I64U -> D128 */
1058      Iop_I64UtoD128,
1059
1060      /*   IRRoundingMode(I32) x D64 -> D32 */
1061      Iop_D64toD32,
1062
1063      /*   IRRoundingMode(I32) x D128 -> D64 */
1064      Iop_D128toD64,
1065
1066      /*   I32S -> D64 */
1067      Iop_I32StoD64,
1068
1069      /*   I32U -> D64 */
1070      Iop_I32UtoD64,
1071
1072      /*   IRRoundingMode(I32) x I64 -> D64 */
1073      Iop_I64StoD64,
1074
1075      /*   IRRoundingMode(I32) x I64 -> D64 */
1076      Iop_I64UtoD64,
1077
1078      /*   IRRoundingMode(I32) x D64 -> I32 */
1079      Iop_D64toI32S,
1080
1081      /*   IRRoundingMode(I32) x D64 -> I32 */
1082      Iop_D64toI32U,
1083
1084      /*   IRRoundingMode(I32) x D64 -> I64 */
1085      Iop_D64toI64S,
1086
1087      /*   IRRoundingMode(I32) x D64 -> I64 */
1088      Iop_D64toI64U,
1089
1090      /*   IRRoundingMode(I32) x D128 -> I32 */
1091      Iop_D128toI32S,
1092
1093      /*   IRRoundingMode(I32) x D128 -> I32 */
1094      Iop_D128toI32U,
1095
1096      /*   IRRoundingMode(I32) x D128 -> I64 */
1097      Iop_D128toI64S,
1098
1099      /*   IRRoundingMode(I32) x D128 -> I64 */
1100      Iop_D128toI64U,
1101
1102      /*   IRRoundingMode(I32) x F32 -> D32 */
1103      Iop_F32toD32,
1104
1105      /*   IRRoundingMode(I32) x F32 -> D64 */
1106      Iop_F32toD64,
1107
1108      /*   IRRoundingMode(I32) x F32 -> D128 */
1109      Iop_F32toD128,
1110
1111      /*   IRRoundingMode(I32) x F64 -> D32 */
1112      Iop_F64toD32,
1113
1114      /*   IRRoundingMode(I32) x F64 -> D64 */
1115      Iop_F64toD64,
1116
1117      /*   IRRoundingMode(I32) x F64 -> D128 */
1118      Iop_F64toD128,
1119
1120      /*   IRRoundingMode(I32) x F128 -> D32 */
1121      Iop_F128toD32,
1122
1123      /*   IRRoundingMode(I32) x F128 -> D64 */
1124      Iop_F128toD64,
1125
1126      /*   IRRoundingMode(I32) x F128 -> D128 */
1127      Iop_F128toD128,
1128
1129      /*   IRRoundingMode(I32) x D32 -> F32 */
1130      Iop_D32toF32,
1131
1132      /*   IRRoundingMode(I32) x D32 -> F64 */
1133      Iop_D32toF64,
1134
1135      /*   IRRoundingMode(I32) x D32 -> F128 */
1136      Iop_D32toF128,
1137
1138      /*   IRRoundingMode(I32) x D64 -> F32 */
1139      Iop_D64toF32,
1140
1141      /*   IRRoundingMode(I32) x D64 -> F64 */
1142      Iop_D64toF64,
1143
1144      /*   IRRoundingMode(I32) x D64 -> F128 */
1145      Iop_D64toF128,
1146
1147      /*   IRRoundingMode(I32) x D128 -> F32 */
1148      Iop_D128toF32,
1149
1150      /*   IRRoundingMode(I32) x D128 -> F64 */
1151      Iop_D128toF64,
1152
1153      /*   IRRoundingMode(I32) x D128 -> F128 */
1154      Iop_D128toF128,
1155
1156      /* ROUNDING INSTRUCTIONS
1157       * IRRoundingMode(I32) x D64 -> D64
1158       * The D64 operand, if a finite number, it is rounded to a
1159       * floating point integer value, i.e. no fractional part.
1160       */
1161      Iop_RoundD64toInt,
1162
1163      /* IRRoundingMode(I32) x D128 -> D128 */
1164      Iop_RoundD128toInt,
1165
1166      /* COMPARE INSTRUCTIONS
1167       * D64 x D64 -> IRCmpD64Result(I32) */
1168      Iop_CmpD64,
1169
1170      /* D128 x D128 -> IRCmpD128Result(I32) */
1171      Iop_CmpD128,
1172
1173      /* COMPARE BIASED EXPONENET INSTRUCTIONS
1174       * D64 x D64 -> IRCmpD64Result(I32) */
1175      Iop_CmpExpD64,
1176
1177      /* D128 x D128 -> IRCmpD128Result(I32) */
1178      Iop_CmpExpD128,
1179
1180      /* QUANTIZE AND ROUND INSTRUCTIONS
1181       * The source operand is converted and rounded to the form with the
1182       * immediate exponent specified by the rounding and exponent parameter.
1183       *
1184       * The second operand is converted and rounded to the form
1185       * of the first operand's exponent and the rounded based on the specified
1186       * rounding mode parameter.
1187       *
1188       * IRRoundingMode(I32) x D64 x D64-> D64 */
1189      Iop_QuantizeD64,
1190
1191      /* IRRoundingMode(I32) x D128 x D128 -> D128 */
1192      Iop_QuantizeD128,
1193
1194      /* IRRoundingMode(I32) x I8 x D64 -> D64
1195       *    The Decimal Floating point operand is rounded to the requested
1196       *    significance given by the I8 operand as specified by the rounding
1197       *    mode.
1198       */
1199      Iop_SignificanceRoundD64,
1200
1201      /* IRRoundingMode(I32) x I8 x D128 -> D128 */
1202      Iop_SignificanceRoundD128,
1203
1204      /* EXTRACT AND INSERT INSTRUCTIONS
1205       * D64 -> I64
1206       *    The exponent of the D32 or D64 operand is extracted.  The
1207       *    extracted exponent is converted to a 64-bit signed binary integer.
1208       */
1209      Iop_ExtractExpD64,
1210
1211      /* D128 -> I64 */
1212      Iop_ExtractExpD128,
1213
1214      /* D64 -> I64
1215       * The number of significand digits of the D64 operand is extracted.
1216       * The number is stored as a 64-bit signed binary integer.
1217       */
1218      Iop_ExtractSigD64,
1219
1220      /* D128 -> I64 */
1221      Iop_ExtractSigD128,
1222
1223      /* I64 x D64  -> D64
1224       *    The exponent is specified by the first I64 operand the signed
1225       *    significand is given by the second I64 value.  The result is a D64
1226       *    value consisting of the specified significand and exponent whose
1227       *    sign is that of the specified significand.
1228       */
1229      Iop_InsertExpD64,
1230
1231      /* I64 x D128 -> D128 */
1232      Iop_InsertExpD128,
1233
1234      /* Support for 128-bit DFP type */
1235      Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64,
1236
1237      /*  I64 -> I64
1238       *     Convert 50-bit densely packed BCD string to 60 bit BCD string
1239       */
1240      Iop_DPBtoBCD,
1241
1242      /* I64 -> I64
1243       *     Convert 60 bit BCD string to 50-bit densely packed BCD string
1244       */
1245      Iop_BCDtoDPB,
1246
1247      /* BCD arithmetic instructions, (V128, V128) -> V128
1248       * The BCD format is the same as that used in the BCD<->DPB conversion
1249       * routines, except using 124 digits (vs 60) plus the trailing 4-bit
1250       * signed code. */
1251      Iop_BCDAdd, Iop_BCDSub,
1252
1253      /* Conversion I64 -> D64 */
1254      Iop_ReinterpI64asD64,
1255
1256      /* Conversion D64 -> I64 */
1257      Iop_ReinterpD64asI64,
1258
1259      /* ------------------ 128-bit SIMD FP. ------------------ */
1260
1261      /* --- 32x4 vector FP --- */
1262
1263      /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */
1264      Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
1265
1266      /* binary */
1267      Iop_Max32Fx4, Iop_Min32Fx4,
1268      Iop_Add32Fx2, Iop_Sub32Fx2,
1269      /* Note: For the following compares, the ppc and arm front-ends assume a
1270         nan in a lane of either argument returns zero for that lane. */
1271      Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
1272      Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
1273
1274      /* Pairwise Max and Min. See integer pairwise operations for details. */
1275      Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1276
1277      /* unary */
1278      Iop_Abs32Fx4,
1279      Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1280      Iop_Neg32Fx4,
1281
1282      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1283      element in the operand vector, and places the results in the destination
1284      vector.  */
1285      Iop_Recip32Fx4,
1286
1287      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1288         Note, that if one of the arguments is zero and another one is infinity
1289         of arbitrary sign the result of the operation is 2.0. */
1290      Iop_Recps32Fx4,
1291
1292      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1293         square root of each element in the operand vector. */
1294      Iop_Rsqrte32Fx4,
1295
1296      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1297         Note, that of one of the arguments is zero and another one is infiinty
1298         of arbitrary sign the result of the operation is 1.5. */
1299      Iop_Rsqrts32Fx4,
1300
1301      /* --- Int to/from FP conversion --- */
1302      /* Unlike the standard fp conversions, these irops take no
1303         rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1304         indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1305      Iop_I32UtoFx4,     Iop_I32StoFx4,       /* I32x4 -> F32x4       */
1306      Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
1307      Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (saturating) */
1308      Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
1309      Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
1310      /* Fixed32 format is floating-point number with fixed number of fraction
1311         bits. The number of fraction bits is passed as a second argument of
1312         type I8. */
1313      Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1314      Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1315
1316      /* --- Single to/from half conversion --- */
1317      /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1318      Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
1319
1320      /* --- 32x4 lowest-lane-only scalar FP --- */
1321
1322      /* In binary cases, upper 3/4 is copied from first operand.  In
1323         unary cases, upper 3/4 is copied from the operand. */
1324
1325      /* binary */
1326      Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1327      Iop_Max32F0x4, Iop_Min32F0x4,
1328      Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1329
1330      /* unary */
1331      Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1332
1333      /* --- 64x2 vector FP --- */
1334
1335      /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */
1336      Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1337
1338      /* binary */
1339      Iop_Max64Fx2, Iop_Min64Fx2,
1340      Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1341
1342      /* unary */
1343      Iop_Abs64Fx2,
1344      Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1345      Iop_Neg64Fx2,
1346
1347      /* Vector Reciprocal Estimate */
1348      Iop_Recip64Fx2,
1349
1350      /* --- 64x2 lowest-lane-only scalar FP --- */
1351
1352      /* In binary cases, upper half is copied from first operand.  In
1353         unary cases, upper half is copied from the operand. */
1354
1355      /* binary */
1356      Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1357      Iop_Max64F0x2, Iop_Min64F0x2,
1358      Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1359
1360      /* unary */
1361      Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1362
1363      /* --- pack / unpack --- */
1364
1365      /* 64 <-> 128 bit vector */
1366      Iop_V128to64,     // :: V128 -> I64, low half
1367      Iop_V128HIto64,   // :: V128 -> I64, high half
1368      Iop_64HLtoV128,   // :: (I64,I64) -> V128
1369
1370      Iop_64UtoV128,
1371      Iop_SetV128lo64,
1372
1373      /* Copies lower 64/32/16/8 bits, zeroes out the rest. */
1374      Iop_ZeroHI64ofV128,    // :: V128 -> V128
1375      Iop_ZeroHI96ofV128,    // :: V128 -> V128
1376      Iop_ZeroHI112ofV128,   // :: V128 -> V128
1377      Iop_ZeroHI120ofV128,   // :: V128 -> V128
1378
1379      /* 32 <-> 128 bit vector */
1380      Iop_32UtoV128,
1381      Iop_V128to32,     // :: V128 -> I32, lowest lane
1382      Iop_SetV128lo32,  // :: (V128,I32) -> V128
1383
1384      /* ------------------ 128-bit SIMD Integer. ------------------ */
1385
1386      /* BITWISE OPS */
1387      Iop_NotV128,
1388      Iop_AndV128, Iop_OrV128, Iop_XorV128,
1389
1390      /* VECTOR SHIFT (shift amt :: Ity_I8) */
1391      Iop_ShlV128, Iop_ShrV128,
1392
1393      /* MISC (vector integer cmp != 0) */
1394      Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1395
1396      /* ADDITION (normal / unsigned sat / signed sat) */
1397      Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
1398      Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1399      Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1400
1401      /* SUBTRACTION (normal / unsigned sat / signed sat) */
1402      Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
1403      Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1404      Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1405
1406      /* MULTIPLICATION (normal / high half of signed/unsigned) */
1407      Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
1408                    Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1409                    Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1410      /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1411      Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4,
1412      Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4,
1413      /* FIXME: document these */
1414      Iop_Mull8Ux8, Iop_Mull8Sx8,
1415      Iop_Mull16Ux4, Iop_Mull16Sx4,
1416      Iop_Mull32Ux2, Iop_Mull32Sx2,
1417      /* Vector Saturating Doubling Multiply Returning High Half and
1418         Vector Saturating Rounding Doubling Multiply Returning High Half */
1419      /* These IROp's multiply corresponding elements in two vectors, double
1420         the results, and place the most significant half of the final results
1421         in the destination vector. The results are truncated or rounded. If
1422         any of the results overflow, they are saturated. */
1423      Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1424      Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1425      /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1426      Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1427      /* Polynomial multiplication treats its arguments as
1428         coefficients of polynomials over {0, 1}. */
1429      Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1430      Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
1431
1432      /* Vector Polynomial multiplication add.   (V128, V128) -> V128
1433
1434       *** Below is the algorithm for the instructions. These Iops could
1435           be emulated to get this functionality, but the emulation would
1436           be long and messy.
1437
1438        Example for polynomial multiply add for vector of bytes
1439        do i = 0 to 15
1440            prod[i].bit[0:14] <- 0
1441            srcA <- VR[argL].byte[i]
1442            srcB <- VR[argR].byte[i]
1443            do j = 0 to 7
1444                do k = 0 to j
1445                    gbit <- srcA.bit[k] & srcB.bit[j-k]
1446                    prod[i].bit[j] <- prod[i].bit[j] ^ gbit
1447                end
1448            end
1449
1450            do j = 8 to 14
1451                do k = j-7 to 7
1452                     gbit <- (srcA.bit[k] & srcB.bit[j-k])
1453                     prod[i].bit[j] <- prod[i].bit[j] ^ gbit
1454                end
1455            end
1456        end
1457
1458        do i = 0 to 7
1459            VR[dst].hword[i] <- 0b0 || (prod[2×i] ^ prod[2×i+1])
1460        end
1461      */
1462      Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8,
1463      Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2,
1464
1465      /* PAIRWISE operations */
1466      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1467            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1468      Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1469      Iop_PwAdd32Fx2,
1470      /* Longening variant is unary. The resulting vector contains two times
1471         less elements than operand, but they are two times wider.
1472         Example:
1473            Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1474               where a+b and c+d are unsigned 32-bit values. */
1475      Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1476      Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1477
1478      /* Other unary pairwise ops */
1479
1480      /* Vector bit matrix transpose.  (V128) -> V128 */
1481      /* For each doubleword element of the source vector, an 8-bit x 8-bit
1482       * matrix transpose is performed. */
1483      Iop_PwBitMtxXpose64x2,
1484
1485      /* ABSOLUTE VALUE */
1486      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1487
1488      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1489      Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1490      Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1491
1492      /* MIN/MAX */
1493      Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2,
1494      Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2,
1495      Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2,
1496      Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2,
1497
1498      /* COMPARISON */
1499      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
1500      Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1501      Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2,
1502
1503      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1504         bit) */
1505      Iop_Cnt8x16,
1506      Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1507      Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1508
1509      /* Sum vector (signed/unsigned)*/
1510      Iop_AddLV8Ux16, Iop_AddLV16Ux8, Iop_AddLV32Ux4,
1511      Iop_AddLV8Sx16, Iop_AddLV16Sx8, Iop_AddLV32Sx4,
1512
1513      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1514      Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1515      Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1516      Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1517
1518      /* VECTOR x VECTOR SHIFT / ROTATE */
1519      Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1520      Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1521      Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1522      Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1523      Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2,
1524
1525      /* VECTOR x VECTOR SATURATING SHIFT */
1526      Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1527      Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1528      /* VECTOR x INTEGER SATURATING SHIFT */
1529      Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1530      Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1531      Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1532
1533      /* NARROWING (binary)
1534         -- narrow 2xV128 into 1xV128, hi half from left arg */
1535      /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1536      Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1537      Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1538      Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1539      Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1540      Iop_QNarrowBin64Sto32Sx4, Iop_QNarrowBin64Uto32Ux4,
1541      Iop_NarrowBin64to32x4,
1542
1543      /* NARROWING (unary) -- narrow V128 into I64 */
1544      Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1545      /* Saturating narrowing from signed source to signed/unsigned
1546         destination */
1547      Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1548      Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1549      /* Saturating narrowing from unsigned source to unsigned destination */
1550      Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1551
1552      /* WIDENING -- sign or zero extend each element of the argument
1553         vector to the twice original size.  The resulting vector consists of
1554         the same number of elements but each element and the vector itself
1555         are twice as wide.
1556         All operations are I64->V128.
1557         Example
1558            Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1559               where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1560      Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1561      Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1562
1563      /* INTERLEAVING */
1564      /* Interleave lanes from low or high halves of
1565         operands.  Most-significant result lane is from the left
1566         arg. */
1567      Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1568      Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1569      Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1570      Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1571      /* Interleave odd/even lanes of operands.  Most-significant result lane
1572         is from the left arg. */
1573      Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1574      Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1575      Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1576
1577      /* CONCATENATION -- build a new value by concatenating either
1578         the even or odd lanes of both operands. */
1579      Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1580      Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1581
1582      /* GET elements of VECTOR
1583         GET is binop (V128, I8) -> I<elem_size> */
1584      /* Note: the arm back-end handles only constant second argument. */
1585      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1586
1587      /* DUPLICATING -- copy value to all lanes */
1588      Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
1589
1590      /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1591         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1592         result.
1593         It is a triop: (V128, V128, I8) -> V128 */
1594      /* Note: the ARM back end handles only constant arg3 in this operation. */
1595      Iop_ExtractV128,
1596
1597      /* REVERSE the order of elements in each Half-words, Words,
1598         Double-words */
1599      /* Examples:
1600            Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1601            Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1602      Iop_Reverse16_8x16,
1603      Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1604      Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1605
1606      /* PERMUTING -- copy src bytes to dst,
1607         as indexed by control vector bytes:
1608            for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1609         argR[i] values may only be in the range 0 .. 15, else behaviour
1610         is undefined. */
1611      Iop_Perm8x16,
1612      Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
1613
1614      /* MISC CONVERSION -- get high bits of each byte lane, a la
1615         x86/amd64 pmovmskb */
1616      Iop_GetMSBs8x16, /* V128 -> I16 */
1617
1618      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1619         See floating-point equiwalents for details. */
1620      Iop_Recip32x4, Iop_Rsqrte32x4,
1621
1622      /* ------------------ 256-bit SIMD Integer. ------------------ */
1623
1624      /* Pack/unpack */
1625      Iop_V256to64_0,  // V256 -> I64, extract least significant lane
1626      Iop_V256to64_1,
1627      Iop_V256to64_2,
1628      Iop_V256to64_3,  // V256 -> I64, extract most significant lane
1629
1630      Iop_64x4toV256,  // (I64,I64,I64,I64)->V256
1631                       // first arg is most significant lane
1632
1633      Iop_V256toV128_0, // V256 -> V128, less significant lane
1634      Iop_V256toV128_1, // V256 -> V128, more significant lane
1635      Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif
1636
1637      Iop_AndV256,
1638      Iop_OrV256,
1639      Iop_XorV256,
1640      Iop_NotV256,
1641
1642      /* MISC (vector integer cmp != 0) */
1643      Iop_CmpNEZ8x32, Iop_CmpNEZ16x16, Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
1644
1645      Iop_Add8x32,    Iop_Add16x16,    Iop_Add32x8,    Iop_Add64x4,
1646      Iop_Sub8x32,    Iop_Sub16x16,    Iop_Sub32x8,    Iop_Sub64x4,
1647
1648      Iop_CmpEQ8x32,  Iop_CmpEQ16x16,  Iop_CmpEQ32x8,  Iop_CmpEQ64x4,
1649      Iop_CmpGT8Sx32, Iop_CmpGT16Sx16, Iop_CmpGT32Sx8, Iop_CmpGT64Sx4,
1650
1651      Iop_ShlN16x16, Iop_ShlN32x8, Iop_ShlN64x4,
1652      Iop_ShrN16x16, Iop_ShrN32x8, Iop_ShrN64x4,
1653      Iop_SarN16x16, Iop_SarN32x8,
1654
1655      Iop_Max8Sx32, Iop_Max16Sx16, Iop_Max32Sx8,
1656      Iop_Max8Ux32, Iop_Max16Ux16, Iop_Max32Ux8,
1657      Iop_Min8Sx32, Iop_Min16Sx16, Iop_Min32Sx8,
1658      Iop_Min8Ux32, Iop_Min16Ux16, Iop_Min32Ux8,
1659
1660      Iop_Mul16x16, Iop_Mul32x8,
1661      Iop_MulHi16Ux16, Iop_MulHi16Sx16,
1662
1663      Iop_QAdd8Ux32, Iop_QAdd16Ux16,
1664      Iop_QAdd8Sx32, Iop_QAdd16Sx16,
1665      Iop_QSub8Ux32, Iop_QSub16Ux16,
1666      Iop_QSub8Sx32, Iop_QSub16Sx16,
1667
1668      Iop_Avg8Ux32, Iop_Avg16Ux16,
1669
1670      Iop_Perm32x8,
1671
1672      /* (V128, V128) -> V128 */
1673      Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128,
1674      Iop_NCipherV128, Iop_NCipherLV128,
1675
1676      /* Hash instructions, Federal Information Processing Standards
1677       * Publication 180-3 Secure Hash Standard. */
1678      /* (V128, I8) -> V128; The I8 input arg is (ST | SIX), where ST and
1679       * SIX are fields from the insn. See ISA 2.07 description of
1680       * vshasigmad and vshasigmaw insns.*/
1681      Iop_SHA512, Iop_SHA256,
1682
1683      /* ------------------ 256-bit SIMD FP. ------------------ */
1684
1685      /* ternary :: IRRoundingMode(I32) x V256 x V256 -> V256 */
1686      Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4,
1687      Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8,
1688
1689      Iop_Sqrt32Fx8,
1690      Iop_Sqrt64Fx4,
1691      Iop_RSqrt32Fx8,
1692      Iop_Recip32Fx8,
1693
1694      Iop_Max32Fx8, Iop_Min32Fx8,
1695      Iop_Max64Fx4, Iop_Min64Fx4,
1696      Iop_LAST      /* must be the last enumerator */
1697   }
1698   IROp;
1699
1700/* Pretty-print an op. */
1701extern void ppIROp ( IROp );
1702
1703
1704/* Encoding of IEEE754-specified rounding modes.
1705   Note, various front and back ends rely on the actual numerical
1706   values of these, so do not change them. */
1707typedef
1708   enum {
1709      Irrm_NEAREST              = 0,  // Round to nearest, ties to even
1710      Irrm_NegINF               = 1,  // Round to negative infinity
1711      Irrm_PosINF               = 2,  // Round to positive infinity
1712      Irrm_ZERO                 = 3,  // Round toward zero
1713      Irrm_NEAREST_TIE_AWAY_0   = 4,  // Round to nearest, ties away from 0
1714      Irrm_PREPARE_SHORTER      = 5,  // Round to prepare for shorter
1715                                      // precision
1716      Irrm_AWAY_FROM_ZERO       = 6,  // Round to away from 0
1717      Irrm_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
1718   }
1719   IRRoundingMode;
1720
1721/* Binary floating point comparison result values.
1722   This is also derived from what IA32 does. */
1723typedef
1724   enum {
1725      Ircr_UN = 0x45,
1726      Ircr_LT = 0x01,
1727      Ircr_GT = 0x00,
1728      Ircr_EQ = 0x40
1729   }
1730   IRCmpFResult;
1731
1732typedef IRCmpFResult IRCmpF32Result;
1733typedef IRCmpFResult IRCmpF64Result;
1734typedef IRCmpFResult IRCmpF128Result;
1735
1736/* Decimal floating point result values. */
1737typedef IRCmpFResult IRCmpDResult;
1738typedef IRCmpDResult IRCmpD64Result;
1739typedef IRCmpDResult IRCmpD128Result;
1740
1741/* ------------------ Expressions ------------------ */
1742
1743typedef struct _IRQop   IRQop;   /* forward declaration */
1744typedef struct _IRTriop IRTriop; /* forward declaration */
1745
1746
1747/* The different kinds of expressions.  Their meaning is explained below
1748   in the comments for IRExpr. */
1749typedef
1750   enum {
1751      Iex_Binder=0x1900,
1752      Iex_Get,
1753      Iex_GetI,
1754      Iex_RdTmp,
1755      Iex_Qop,
1756      Iex_Triop,
1757      Iex_Binop,
1758      Iex_Unop,
1759      Iex_Load,
1760      Iex_Const,
1761      Iex_ITE,
1762      Iex_CCall,
1763      Iex_VECRET,
1764      Iex_BBPTR
1765   }
1766   IRExprTag;
1767
1768/* An expression.  Stored as a tagged union.  'tag' indicates what kind
1769   of expression this is.  'Iex' is the union that holds the fields.  If
1770   an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1771   expression, and the fields can be accessed with
1772   'e.Iex.Load.<fieldname>'.
1773
1774   For each kind of expression, we show what it looks like when
1775   pretty-printed with ppIRExpr().
1776*/
1777typedef
1778   struct _IRExpr
1779   IRExpr;
1780
1781struct _IRExpr {
1782   IRExprTag tag;
1783   union {
1784      /* Used only in pattern matching within Vex.  Should not be seen
1785         outside of Vex. */
1786      struct {
1787         Int binder;
1788      } Binder;
1789
1790      /* Read a guest register, at a fixed offset in the guest state.
1791         ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1792      */
1793      struct {
1794         Int    offset;    /* Offset into the guest state */
1795         IRType ty;        /* Type of the value being read */
1796      } Get;
1797
1798      /* Read a guest register at a non-fixed offset in the guest
1799         state.  This allows circular indexing into parts of the guest
1800         state, which is essential for modelling situations where the
1801         identity of guest registers is not known until run time.  One
1802         example is the x87 FP register stack.
1803
1804         The part of the guest state to be treated as a circular array
1805         is described in the IRRegArray 'descr' field.  It holds the
1806         offset of the first element in the array, the type of each
1807         element, and the number of elements.
1808
1809         The array index is indicated rather indirectly, in a way
1810         which makes optimisation easy: as the sum of variable part
1811         (the 'ix' field) and a constant offset (the 'bias' field).
1812
1813         Since the indexing is circular, the actual array index to use
1814         is computed as (ix + bias) % num-of-elems-in-the-array.
1815
1816         Here's an example.  The description
1817
1818            (96:8xF64)[t39,-7]
1819
1820         describes an array of 8 F64-typed values, the
1821         guest-state-offset of the first being 96.  This array is
1822         being indexed at (t39 - 7) % 8.
1823
1824         It is important to get the array size/type exactly correct
1825         since IR optimisation looks closely at such info in order to
1826         establish aliasing/non-aliasing between seperate GetI and
1827         PutI events, which is used to establish when they can be
1828         reordered, etc.  Putting incorrect info in will lead to
1829         obscure IR optimisation bugs.
1830
1831            ppIRExpr output: GETI<descr>[<ix>,<bias]
1832                         eg. GETI(128:8xI8)[t1,0]
1833      */
1834      struct {
1835         IRRegArray* descr; /* Part of guest state treated as circular */
1836         IRExpr*     ix;    /* Variable part of index into array */
1837         Int         bias;  /* Constant offset part of index into array */
1838      } GetI;
1839
1840      /* The value held by a temporary.
1841         ppIRExpr output: t<tmp>, eg. t1
1842      */
1843      struct {
1844         IRTemp tmp;       /* The temporary number */
1845      } RdTmp;
1846
1847      /* A quaternary operation.
1848         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1849                      eg. MAddF64r32(t1, t2, t3, t4)
1850      */
1851      struct {
1852        IRQop* details;
1853      } Qop;
1854
1855      /* A ternary operation.
1856         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1857                      eg. MulF64(1, 2.0, 3.0)
1858      */
1859      struct {
1860        IRTriop* details;
1861      } Triop;
1862
1863      /* A binary operation.
1864         ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1865      */
1866      struct {
1867         IROp op;          /* op-code   */
1868         IRExpr* arg1;     /* operand 1 */
1869         IRExpr* arg2;     /* operand 2 */
1870      } Binop;
1871
1872      /* A unary operation.
1873         ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1874      */
1875      struct {
1876         IROp    op;       /* op-code */
1877         IRExpr* arg;      /* operand */
1878      } Unop;
1879
1880      /* A load from memory -- a normal load, not a load-linked.
1881         Load-Linkeds (and Store-Conditionals) are instead represented
1882         by IRStmt.LLSC since Load-Linkeds have side effects and so
1883         are not semantically valid IRExpr's.
1884         ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1885      */
1886      struct {
1887         IREndness end;    /* Endian-ness of the load */
1888         IRType    ty;     /* Type of the loaded value */
1889         IRExpr*   addr;   /* Address being loaded from */
1890      } Load;
1891
1892      /* A constant-valued expression.
1893         ppIRExpr output: <con>, eg. 0x4:I32
1894      */
1895      struct {
1896         IRConst* con;     /* The constant itself */
1897      } Const;
1898
1899      /* A call to a pure (no side-effects) helper C function.
1900
1901         With the 'cee' field, 'name' is the function's name.  It is
1902         only used for pretty-printing purposes.  The address to call
1903         (host address, of course) is stored in the 'addr' field
1904         inside 'cee'.
1905
1906         The 'args' field is a NULL-terminated array of arguments.
1907         The stated return IRType, and the implied argument types,
1908         must match that of the function being called well enough so
1909         that the back end can actually generate correct code for the
1910         call.
1911
1912         The called function **must** satisfy the following:
1913
1914         * no side effects -- must be a pure function, the result of
1915           which depends only on the passed parameters.
1916
1917         * it may not look at, nor modify, any of the guest state
1918           since that would hide guest state transitions from
1919           instrumenters
1920
1921         * it may not access guest memory, since that would hide
1922           guest memory transactions from the instrumenters
1923
1924         * it must not assume that arguments are being evaluated in a
1925           particular order. The oder of evaluation is unspecified.
1926
1927         This is restrictive, but makes the semantics clean, and does
1928         not interfere with IR optimisation.
1929
1930         If you want to call a helper which can mess with guest state
1931         and/or memory, instead use Ist_Dirty.  This is a lot more
1932         flexible, but you have to give a bunch of details about what
1933         the helper does (and you better be telling the truth,
1934         otherwise any derived instrumentation will be wrong).  Also
1935         Ist_Dirty inhibits various IR optimisations and so can cause
1936         quite poor code to be generated.  Try to avoid it.
1937
1938         In principle it would be allowable to have the arg vector
1939         contain an IRExpr_VECRET(), although not IRExpr_BBPTR(). However,
1940         at the moment there is no requirement for clean helper calls to
1941         be able to return V128 or V256 values.  Hence this is not allowed.
1942
1943         ppIRExpr output: <cee>(<args>):<retty>
1944                      eg. foo{0x80489304}(t1, t2):I32
1945      */
1946      struct {
1947         IRCallee* cee;    /* Function to call. */
1948         IRType    retty;  /* Type of return value. */
1949         IRExpr**  args;   /* Vector of argument expressions. */
1950      }  CCall;
1951
1952      /* A ternary if-then-else operator.  It returns iftrue if cond is
1953         nonzero, iffalse otherwise.  Note that it is STRICT, ie. both
1954         iftrue and iffalse are evaluated in all cases.
1955
1956         ppIRExpr output: ITE(<cond>,<iftrue>,<iffalse>),
1957                         eg. ITE(t6,t7,t8)
1958      */
1959      struct {
1960         IRExpr* cond;     /* Condition */
1961         IRExpr* iftrue;   /* True expression */
1962         IRExpr* iffalse;  /* False expression */
1963      } ITE;
1964   } Iex;
1965};
1966
1967/* Expression auxiliaries: a ternary expression. */
1968struct _IRTriop {
1969   IROp op;          /* op-code   */
1970   IRExpr* arg1;     /* operand 1 */
1971   IRExpr* arg2;     /* operand 2 */
1972   IRExpr* arg3;     /* operand 3 */
1973};
1974
1975/* Expression auxiliaries: a quarternary expression. */
1976struct _IRQop {
1977   IROp op;          /* op-code   */
1978   IRExpr* arg1;     /* operand 1 */
1979   IRExpr* arg2;     /* operand 2 */
1980   IRExpr* arg3;     /* operand 3 */
1981   IRExpr* arg4;     /* operand 4 */
1982};
1983
1984
1985/* Two special kinds of IRExpr, which can ONLY be used in
1986   argument lists for dirty helper calls (IRDirty.args) and in NO
1987   OTHER PLACES.  And then only in very limited ways.  */
1988
1989/* Denotes an argument which (in the helper) takes a pointer to a
1990   (naturally aligned) V128 or V256, into which the helper is expected
1991   to write its result.  Use of IRExpr_VECRET() is strictly
1992   controlled.  If the helper returns a V128 or V256 value then
1993   IRExpr_VECRET() must appear exactly once in the arg list, although
1994   it can appear anywhere, and the helper must have a C 'void' return
1995   type.  If the helper returns any other type, IRExpr_VECRET() may
1996   not appear in the argument list. */
1997
1998/* Denotes an void* argument which is passed to the helper, which at
1999   run time will point to the thread's guest state area.  This can
2000   only appear at most once in an argument list, and it may not appear
2001   at all in argument lists for clean helper calls. */
2002
2003static inline Bool is_IRExpr_VECRET_or_BBPTR ( IRExpr* e ) {
2004   return e->tag == Iex_VECRET || e->tag == Iex_BBPTR;
2005}
2006
2007
2008/* Expression constructors. */
2009extern IRExpr* IRExpr_Binder ( Int binder );
2010extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
2011extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
2012extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
2013extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
2014                                        IRExpr* arg3, IRExpr* arg4 );
2015extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
2016                                        IRExpr* arg2, IRExpr* arg3 );
2017extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
2018extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
2019extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
2020extern IRExpr* IRExpr_Const  ( IRConst* con );
2021extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
2022extern IRExpr* IRExpr_ITE    ( IRExpr* cond, IRExpr* iftrue, IRExpr* iffalse );
2023extern IRExpr* IRExpr_VECRET ( void );
2024extern IRExpr* IRExpr_BBPTR  ( void );
2025
2026/* Deep-copy an IRExpr. */
2027extern IRExpr* deepCopyIRExpr ( IRExpr* );
2028
2029/* Pretty-print an IRExpr. */
2030extern void ppIRExpr ( IRExpr* );
2031
2032/* NULL-terminated IRExpr vector constructors, suitable for
2033   use as arg lists in clean/dirty helper calls. */
2034extern IRExpr** mkIRExprVec_0 ( void );
2035extern IRExpr** mkIRExprVec_1 ( IRExpr* );
2036extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
2037extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
2038extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
2039extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2040                                IRExpr* );
2041extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2042                                IRExpr*, IRExpr* );
2043extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2044                                IRExpr*, IRExpr*, IRExpr* );
2045extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2046                                IRExpr*, IRExpr*, IRExpr*, IRExpr*);
2047
2048/* IRExpr copiers:
2049   - shallowCopy: shallow-copy (ie. create a new vector that shares the
2050     elements with the original).
2051   - deepCopy: deep-copy (ie. create a completely new vector). */
2052extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
2053extern IRExpr** deepCopyIRExprVec ( IRExpr** );
2054
2055/* Make a constant expression from the given host word taking into
2056   account (of course) the host word size. */
2057extern IRExpr* mkIRExpr_HWord ( HWord );
2058
2059/* Convenience function for constructing clean helper calls. */
2060extern
2061IRExpr* mkIRExprCCall ( IRType retty,
2062                        Int regparms, const HChar* name, void* addr,
2063                        IRExpr** args );
2064
2065
2066/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
2067 * Iex_Const). */
2068static inline Bool isIRAtom ( IRExpr* e ) {
2069   return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
2070}
2071
2072/* Are these two IR atoms identical?  Causes an assertion
2073   failure if they are passed non-atoms. */
2074extern Bool eqIRAtom ( IRExpr*, IRExpr* );
2075
2076
2077/* ------------------ Jump kinds ------------------ */
2078
2079/* This describes hints which can be passed to the dispatcher at guest
2080   control-flow transfer points.
2081
2082   Re Ijk_InvalICache and Ijk_FlushDCache: the guest state _must_ have
2083   two pseudo-registers, guest_CMSTART and guest_CMLEN, which specify
2084   the start and length of the region to be invalidated.  CM stands
2085   for "Cache Management".  These are both the size of a guest word.
2086   It is the responsibility of the relevant toIR.c to ensure that
2087   these are filled in with suitable values before issuing a jump of
2088   kind Ijk_InvalICache or Ijk_FlushDCache.
2089
2090   Ijk_InvalICache requests invalidation of translations taken from
2091   the requested range.  Ijk_FlushDCache requests flushing of the D
2092   cache for the specified range.
2093
2094   Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
2095   pseudo-register guest_EMNOTE, which is 32-bits regardless of the
2096   host or guest word size.  That register should be made to hold a
2097   VexEmNote value to indicate the reason for the exit.
2098
2099   In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
2100   cannot continue) and so the jump destination can be anything.
2101
2102   Re Ijk_Sys_ (syscall jumps): the guest state must have a
2103   pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
2104   word.  Front ends should set this to be the IP at the most recently
2105   executed kernel-entering (system call) instruction.  This makes it
2106   very much easier (viz, actually possible at all) to back up the
2107   guest to restart a syscall that has been interrupted by a signal.
2108*/
2109typedef
2110   enum {
2111      Ijk_INVALID=0x1A00,
2112      Ijk_Boring,         /* not interesting; just goto next */
2113      Ijk_Call,           /* guest is doing a call */
2114      Ijk_Ret,            /* guest is doing a return */
2115      Ijk_ClientReq,      /* do guest client req before continuing */
2116      Ijk_Yield,          /* client is yielding to thread scheduler */
2117      Ijk_EmWarn,         /* report emulation warning before continuing */
2118      Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
2119      Ijk_NoDecode,       /* current instruction cannot be decoded */
2120      Ijk_MapFail,        /* Vex-provided address translation failed */
2121      Ijk_InvalICache,    /* Inval icache for range [CMSTART, +CMLEN) */
2122      Ijk_FlushDCache,    /* Flush dcache for range [CMSTART, +CMLEN) */
2123      Ijk_NoRedir,        /* Jump to un-redirected guest addr */
2124      Ijk_SigILL,         /* current instruction synths SIGILL */
2125      Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
2126      Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
2127      Ijk_SigBUS,         /* current instruction synths SIGBUS */
2128      Ijk_SigFPE_IntDiv,  /* current instruction synths SIGFPE - IntDiv */
2129      Ijk_SigFPE_IntOvf,  /* current instruction synths SIGFPE - IntOvf */
2130      /* Unfortunately, various guest-dependent syscall kinds.  They
2131	 all mean: do a syscall before continuing. */
2132      Ijk_Sys_syscall,    /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */
2133      Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
2134      Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
2135      Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
2136      Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
2137      Ijk_Sys_sysenter    /* x86 'sysenter'.  guest_EIP becomes
2138                             invalid at the point this happens. */
2139   }
2140   IRJumpKind;
2141
2142extern void ppIRJumpKind ( IRJumpKind );
2143
2144
2145/* ------------------ Dirty helper calls ------------------ */
2146
2147/* A dirty call is a flexible mechanism for calling (possibly
2148   conditionally) a helper function or procedure.  The helper function
2149   may read, write or modify client memory, and may read, write or
2150   modify client state.  It can take arguments and optionally return a
2151   value.  It may return different results and/or do different things
2152   when called repeatedly with the same arguments, by means of storing
2153   private state.
2154
2155   If a value is returned, it is assigned to the nominated return
2156   temporary.
2157
2158   Dirty calls are statements rather than expressions for obvious
2159   reasons.  If a dirty call is marked as writing guest state, any
2160   pre-existing values derived from the written parts of the guest
2161   state are invalid.  Similarly, if the dirty call is stated as
2162   writing memory, any pre-existing loaded values are invalidated by
2163   it.
2164
2165   In order that instrumentation is possible, the call must state, and
2166   state correctly:
2167
2168   * Whether it reads, writes or modifies memory, and if so where.
2169
2170   * Whether it reads, writes or modifies guest state, and if so which
2171     pieces.  Several pieces may be stated, and their extents must be
2172     known at translation-time.  Each piece is allowed to repeat some
2173     number of times at a fixed interval, if required.
2174
2175   Normally, code is generated to pass just the args to the helper.
2176   However, if IRExpr_BBPTR() is present in the argument list (at most
2177   one instance is allowed), then the baseblock pointer is passed for
2178   that arg, so that the callee can access the guest state.  It is
2179   invalid for .nFxState to be zero but IRExpr_BBPTR() to be present,
2180   since .nFxState==0 is a claim that the call does not access guest
2181   state.
2182
2183   IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
2184   arguments and 'mFx' are evaluated REGARDLESS of the guard value.
2185   The order of argument evaluation is unspecified.  The guard
2186   expression is evaluated AFTER the arguments and 'mFx' have been
2187   evaluated.  'mFx' is expected (by Memcheck) to be a defined value
2188   even if the guard evaluates to false.
2189*/
2190
2191#define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
2192
2193/* Effects on resources (eg. registers, memory locations) */
2194typedef
2195   enum {
2196      Ifx_None=0x1B00,      /* no effect */
2197      Ifx_Read,             /* reads the resource */
2198      Ifx_Write,            /* writes the resource */
2199      Ifx_Modify,           /* modifies the resource */
2200   }
2201   IREffect;
2202
2203/* Pretty-print an IREffect */
2204extern void ppIREffect ( IREffect );
2205
2206typedef
2207   struct _IRDirty {
2208      /* What to call, and details of args/results.  .guard must be
2209         non-NULL.  If .tmp is not IRTemp_INVALID, then the call
2210         returns a result which is placed in .tmp.  If at runtime the
2211         guard evaluates to false, .tmp has an 0x555..555 bit pattern
2212         written to it.  Hence conditional calls that assign .tmp are
2213         allowed. */
2214      IRCallee* cee;    /* where to call */
2215      IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
2216      /* The args vector may contain IRExpr_BBPTR() and/or
2217         IRExpr_VECRET(), in both cases, at most once. */
2218      IRExpr**  args;   /* arg vector, ends in NULL. */
2219      IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
2220
2221      /* Mem effects; we allow only one R/W/M region to be stated */
2222      IREffect  mFx;    /* indicates memory effects, if any */
2223      IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
2224      Int       mSize;  /* of access, or zero if mFx==Ifx_None */
2225
2226      /* Guest state effects; up to N allowed */
2227      Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
2228      struct {
2229         IREffect fx:16;   /* read, write or modify?  Ifx_None is invalid. */
2230         UShort   offset;
2231         UShort   size;
2232         UChar    nRepeats;
2233         UChar    repeatLen;
2234      } fxState[VEX_N_FXSTATE];
2235      /* The access can be repeated, as specified by nRepeats and
2236         repeatLen.  To describe only a single access, nRepeats and
2237         repeatLen should be zero.  Otherwise, repeatLen must be a
2238         multiple of size and greater than size. */
2239      /* Overall, the parts of the guest state denoted by (offset,
2240         size, nRepeats, repeatLen) is
2241               [offset, +size)
2242            and, if nRepeats > 0,
2243               for (i = 1; i <= nRepeats; i++)
2244                  [offset + i * repeatLen, +size)
2245         A convenient way to enumerate all segments is therefore
2246            for (i = 0; i < 1 + nRepeats; i++)
2247               [offset + i * repeatLen, +size)
2248      */
2249   }
2250   IRDirty;
2251
2252/* Pretty-print a dirty call */
2253extern void     ppIRDirty ( IRDirty* );
2254
2255/* Allocate an uninitialised dirty call */
2256extern IRDirty* emptyIRDirty ( void );
2257
2258/* Deep-copy a dirty call */
2259extern IRDirty* deepCopyIRDirty ( IRDirty* );
2260
2261/* A handy function which takes some of the tedium out of constructing
2262   dirty helper calls.  The called function impliedly does not return
2263   any value and has a constant-True guard.  The call is marked as
2264   accessing neither guest state nor memory (hence the "unsafe"
2265   designation) -- you can change this marking later if need be.  A
2266   suitable IRCallee is constructed from the supplied bits. */
2267extern
2268IRDirty* unsafeIRDirty_0_N ( Int regparms, const HChar* name, void* addr,
2269                             IRExpr** args );
2270
2271/* Similarly, make a zero-annotation dirty call which returns a value,
2272   and assign that to the given temp. */
2273extern
2274IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
2275                             Int regparms, const HChar* name, void* addr,
2276                             IRExpr** args );
2277
2278
2279/* --------------- Memory Bus Events --------------- */
2280
2281typedef
2282   enum {
2283      Imbe_Fence=0x1C00,
2284      /* Needed only on ARM.  It cancels a reservation made by a
2285         preceding Linked-Load, and needs to be handed through to the
2286         back end, just as LL and SC themselves are. */
2287      Imbe_CancelReservation
2288   }
2289   IRMBusEvent;
2290
2291extern void ppIRMBusEvent ( IRMBusEvent );
2292
2293
2294/* --------------- Compare and Swap --------------- */
2295
2296/* This denotes an atomic compare and swap operation, either
2297   a single-element one or a double-element one.
2298
2299   In the single-element case:
2300
2301     .addr is the memory address.
2302     .end  is the endianness with which memory is accessed
2303
2304     If .addr contains the same value as .expdLo, then .dataLo is
2305     written there, else there is no write.  In both cases, the
2306     original value at .addr is copied into .oldLo.
2307
2308     Types: .expdLo, .dataLo and .oldLo must all have the same type.
2309     It may be any integral type, viz: I8, I16, I32 or, for 64-bit
2310     guests, I64.
2311
2312     .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
2313     be NULL.
2314
2315   In the double-element case:
2316
2317     .addr is the memory address.
2318     .end  is the endianness with which memory is accessed
2319
2320     The operation is the same:
2321
2322     If .addr contains the same value as .expdHi:.expdLo, then
2323     .dataHi:.dataLo is written there, else there is no write.  In
2324     both cases the original value at .addr is copied into
2325     .oldHi:.oldLo.
2326
2327     Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
2328     all have the same type, which may be any integral type, viz: I8,
2329     I16, I32 or, for 64-bit guests, I64.
2330
2331     The double-element case is complicated by the issue of
2332     endianness.  In all cases, the two elements are understood to be
2333     located adjacently in memory, starting at the address .addr.
2334
2335       If .end is Iend_LE, then the .xxxLo component is at the lower
2336       address and the .xxxHi component is at the higher address, and
2337       each component is itself stored little-endianly.
2338
2339       If .end is Iend_BE, then the .xxxHi component is at the lower
2340       address and the .xxxLo component is at the higher address, and
2341       each component is itself stored big-endianly.
2342
2343   This allows representing more cases than most architectures can
2344   handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
2345
2346   How to know if the CAS succeeded?
2347
2348   * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
2349     then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
2350     stored at .addr, and the original value there was .oldLo (resp
2351     .oldHi:.oldLo).
2352
2353   * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
2354     then the CAS failed, and the original value at .addr was .oldLo
2355     (resp. .oldHi:.oldLo).
2356
2357   Hence it is easy to know whether or not the CAS succeeded.
2358*/
2359typedef
2360   struct {
2361      IRTemp    oldHi;  /* old value of *addr is written here */
2362      IRTemp    oldLo;
2363      IREndness end;    /* endianness of the data in memory */
2364      IRExpr*   addr;   /* store address */
2365      IRExpr*   expdHi; /* expected old value at *addr */
2366      IRExpr*   expdLo;
2367      IRExpr*   dataHi; /* new value for *addr */
2368      IRExpr*   dataLo;
2369   }
2370   IRCAS;
2371
2372extern void ppIRCAS ( IRCAS* cas );
2373
2374extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
2375                        IREndness end, IRExpr* addr,
2376                        IRExpr* expdHi, IRExpr* expdLo,
2377                        IRExpr* dataHi, IRExpr* dataLo );
2378
2379extern IRCAS* deepCopyIRCAS ( IRCAS* );
2380
2381
2382/* ------------------ Circular Array Put ------------------ */
2383
2384typedef
2385   struct {
2386      IRRegArray* descr; /* Part of guest state treated as circular */
2387      IRExpr*     ix;    /* Variable part of index into array */
2388      Int         bias;  /* Constant offset part of index into array */
2389      IRExpr*     data;  /* The value to write */
2390   } IRPutI;
2391
2392extern void ppIRPutI ( IRPutI* puti );
2393
2394extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix,
2395                          Int bias, IRExpr* data );
2396
2397extern IRPutI* deepCopyIRPutI ( IRPutI* );
2398
2399
2400/* --------------- Guarded loads and stores --------------- */
2401
2402/* Conditional stores are straightforward.  They are the same as
2403   normal stores, with an extra 'guard' field :: Ity_I1 that
2404   determines whether or not the store actually happens.  If not,
2405   memory is unmodified.
2406
2407   The semantics of this is that 'addr' and 'data' are fully evaluated
2408   even in the case where 'guard' evaluates to zero (false).
2409*/
2410typedef
2411   struct {
2412      IREndness end;    /* Endianness of the store */
2413      IRExpr*   addr;   /* store address */
2414      IRExpr*   data;   /* value to write */
2415      IRExpr*   guard;  /* Guarding value */
2416   }
2417   IRStoreG;
2418
2419/* Conditional loads are a little more complex.  'addr' is the
2420   address, 'guard' is the guarding condition.  If the load takes
2421   place, the loaded value is placed in 'dst'.  If it does not take
2422   place, 'alt' is copied to 'dst'.  However, the loaded value is not
2423   placed directly in 'dst' -- it is first subjected to the conversion
2424   specified by 'cvt'.
2425
2426   For example, imagine doing a conditional 8-bit load, in which the
2427   loaded value is zero extended to 32 bits.  Hence:
2428   * 'dst' and 'alt' must have type I32
2429   * 'cvt' must be a unary op which converts I8 to I32.  In this
2430     example, it would be ILGop_8Uto32.
2431
2432   There is no explicit indication of the type at which the load is
2433   done, since that is inferrable from the arg type of 'cvt'.  Note
2434   that the types of 'alt' and 'dst' and the result type of 'cvt' must
2435   all be the same.
2436
2437   Semantically, 'addr' is evaluated even in the case where 'guard'
2438   evaluates to zero (false), and 'alt' is evaluated even when 'guard'
2439   evaluates to one (true).  That is, 'addr' and 'alt' are always
2440   evaluated.
2441*/
2442typedef
2443   enum {
2444      ILGop_INVALID=0x1D00,
2445      ILGop_Ident32,   /* 32 bit, no conversion */
2446      ILGop_16Uto32,   /* 16 bit load, Z-widen to 32 */
2447      ILGop_16Sto32,   /* 16 bit load, S-widen to 32 */
2448      ILGop_8Uto32,    /* 8 bit load, Z-widen to 32 */
2449      ILGop_8Sto32     /* 8 bit load, S-widen to 32 */
2450   }
2451   IRLoadGOp;
2452
2453typedef
2454   struct {
2455      IREndness end;    /* Endianness of the load */
2456      IRLoadGOp cvt;    /* Conversion to apply to the loaded value */
2457      IRTemp    dst;    /* Destination (LHS) of assignment */
2458      IRExpr*   addr;   /* Address being loaded from */
2459      IRExpr*   alt;    /* Value if load is not done. */
2460      IRExpr*   guard;  /* Guarding value */
2461   }
2462   IRLoadG;
2463
2464extern void ppIRStoreG ( IRStoreG* sg );
2465
2466extern void ppIRLoadGOp ( IRLoadGOp cvt );
2467
2468extern void ppIRLoadG ( IRLoadG* lg );
2469
2470extern IRStoreG* mkIRStoreG ( IREndness end,
2471                              IRExpr* addr, IRExpr* data,
2472                              IRExpr* guard );
2473
2474extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt,
2475                            IRTemp dst, IRExpr* addr, IRExpr* alt,
2476                            IRExpr* guard );
2477
2478
2479/* ------------------ Statements ------------------ */
2480
2481/* The different kinds of statements.  Their meaning is explained
2482   below in the comments for IRStmt.
2483
2484   Those marked META do not represent code, but rather extra
2485   information about the code.  These statements can be removed
2486   without affecting the functional behaviour of the code, however
2487   they are required by some IR consumers such as tools that
2488   instrument the code.
2489*/
2490
2491typedef
2492   enum {
2493      Ist_NoOp=0x1E00,
2494      Ist_IMark,     /* META */
2495      Ist_AbiHint,   /* META */
2496      Ist_Put,
2497      Ist_PutI,
2498      Ist_WrTmp,
2499      Ist_Store,
2500      Ist_LoadG,
2501      Ist_StoreG,
2502      Ist_CAS,
2503      Ist_LLSC,
2504      Ist_Dirty,
2505      Ist_MBE,
2506      Ist_Exit
2507   }
2508   IRStmtTag;
2509
2510/* A statement.  Stored as a tagged union.  'tag' indicates what kind
2511   of expression this is.  'Ist' is the union that holds the fields.
2512   If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
2513   statement, and the fields can be accessed with
2514   'st.Ist.Store.<fieldname>'.
2515
2516   For each kind of statement, we show what it looks like when
2517   pretty-printed with ppIRStmt().
2518*/
2519typedef
2520   struct _IRStmt {
2521      IRStmtTag tag;
2522      union {
2523         /* A no-op (usually resulting from IR optimisation).  Can be
2524            omitted without any effect.
2525
2526            ppIRStmt output: IR-NoOp
2527         */
2528         struct {
2529	 } NoOp;
2530
2531         /* META: instruction mark.  Marks the start of the statements
2532            that represent a single machine instruction (the end of
2533            those statements is marked by the next IMark or the end of
2534            the IRSB).  Contains the address and length of the
2535            instruction.
2536
2537            It also contains a delta value.  The delta must be
2538            subtracted from a guest program counter value before
2539            attempting to establish, by comparison with the address
2540            and length values, whether or not that program counter
2541            value refers to this instruction.  For x86, amd64, ppc32,
2542            ppc64 and arm, the delta value is zero.  For Thumb
2543            instructions, the delta value is one.  This is because, on
2544            Thumb, guest PC values (guest_R15T) are encoded using the
2545            top 31 bits of the instruction address and a 1 in the lsb;
2546            hence they appear to be (numerically) 1 past the start of
2547            the instruction they refer to.  IOW, guest_R15T on ARM
2548            holds a standard ARM interworking address.
2549
2550            ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
2551                         eg. ------ IMark(0x4000792, 5, 0) ------,
2552         */
2553         struct {
2554            Addr64 addr;   /* instruction address */
2555            Int    len;    /* instruction length */
2556            UChar  delta;  /* addr = program counter as encoded in guest state
2557                                     - delta */
2558         } IMark;
2559
2560         /* META: An ABI hint, which says something about this
2561            platform's ABI.
2562
2563            At the moment, the only AbiHint is one which indicates
2564            that a given chunk of address space, [base .. base+len-1],
2565            has become undefined.  This is used on amd64-linux and
2566            some ppc variants to pass stack-redzoning hints to whoever
2567            wants to see them.  It also indicates the address of the
2568            next (dynamic) instruction that will be executed.  This is
2569            to help Memcheck to origin tracking.
2570
2571            ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
2572                         eg. ====== AbiHint(t1, 16, t2) ======
2573         */
2574         struct {
2575            IRExpr* base;     /* Start  of undefined chunk */
2576            Int     len;      /* Length of undefined chunk */
2577            IRExpr* nia;      /* Address of next (guest) insn */
2578         } AbiHint;
2579
2580         /* Write a guest register, at a fixed offset in the guest state.
2581            ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
2582         */
2583         struct {
2584            Int     offset;   /* Offset into the guest state */
2585            IRExpr* data;     /* The value to write */
2586         } Put;
2587
2588         /* Write a guest register, at a non-fixed offset in the guest
2589            state.  See the comment for GetI expressions for more
2590            information.
2591
2592            ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
2593                         eg. PUTI(64:8xF64)[t5,0] = t1
2594         */
2595         struct {
2596            IRPutI* details;
2597         } PutI;
2598
2599         /* Assign a value to a temporary.  Note that SSA rules require
2600            each tmp is only assigned to once.  IR sanity checking will
2601            reject any block containing a temporary which is not assigned
2602            to exactly once.
2603
2604            ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2605         */
2606         struct {
2607            IRTemp  tmp;   /* Temporary  (LHS of assignment) */
2608            IRExpr* data;  /* Expression (RHS of assignment) */
2609         } WrTmp;
2610
2611         /* Write a value to memory.  This is a normal store, not a
2612            Store-Conditional.  To represent a Store-Conditional,
2613            instead use IRStmt.LLSC.
2614            ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2615         */
2616         struct {
2617            IREndness end;    /* Endianness of the store */
2618            IRExpr*   addr;   /* store address */
2619            IRExpr*   data;   /* value to write */
2620         } Store;
2621
2622         /* Guarded store.  Note that this is defined to evaluate all
2623            expression fields (addr, data) even if the guard evaluates
2624            to false.
2625            ppIRStmt output:
2626              if (<guard>) ST<end>(<addr>) = <data> */
2627         struct {
2628            IRStoreG* details;
2629         } StoreG;
2630
2631         /* Guarded load.  Note that this is defined to evaluate all
2632            expression fields (addr, alt) even if the guard evaluates
2633            to false.
2634            ppIRStmt output:
2635              t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */
2636         struct {
2637            IRLoadG* details;
2638         } LoadG;
2639
2640         /* Do an atomic compare-and-swap operation.  Semantics are
2641            described above on a comment at the definition of IRCAS.
2642
2643            ppIRStmt output:
2644               t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2645            eg
2646               t1 = CASle(t2 :: t3->Add32(t3,1))
2647               which denotes a 32-bit atomic increment
2648               of a value at address t2
2649
2650            A double-element CAS may also be denoted, in which case <tmp>,
2651            <expected> and <new> are all pairs of items, separated by
2652            commas.
2653         */
2654         struct {
2655            IRCAS* details;
2656         } CAS;
2657
2658         /* Either Load-Linked or Store-Conditional, depending on
2659            STOREDATA.
2660
2661            If STOREDATA is NULL then this is a Load-Linked, meaning
2662            that data is loaded from memory as normal, but a
2663            'reservation' for the address is also lodged in the
2664            hardware.
2665
2666               result = Load-Linked(addr, end)
2667
2668            The data transfer type is the type of RESULT (I32, I64,
2669            etc).  ppIRStmt output:
2670
2671               result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2672
2673            If STOREDATA is not NULL then this is a Store-Conditional,
2674            hence:
2675
2676               result = Store-Conditional(addr, storedata, end)
2677
2678            The data transfer type is the type of STOREDATA and RESULT
2679            has type Ity_I1. The store may fail or succeed depending
2680            on the state of a previously lodged reservation on this
2681            address.  RESULT is written 1 if the store succeeds and 0
2682            if it fails.  eg ppIRStmt output:
2683
2684               result = ( ST<end>-Cond(<addr>) = <storedata> )
2685               eg t3 = ( STbe-Cond(t1, t2) )
2686
2687            In all cases, the address must be naturally aligned for
2688            the transfer type -- any misaligned addresses should be
2689            caught by a dominating IR check and side exit.  This
2690            alignment restriction exists because on at least some
2691            LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2692            misaligned addresses, and we have to actually generate
2693            stwcx. on the host, and we don't want it trapping on the
2694            host.
2695
2696            Summary of rules for transfer type:
2697              STOREDATA == NULL (LL):
2698                transfer type = type of RESULT
2699              STOREDATA != NULL (SC):
2700                transfer type = type of STOREDATA, and RESULT :: Ity_I1
2701         */
2702         struct {
2703            IREndness end;
2704            IRTemp    result;
2705            IRExpr*   addr;
2706            IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
2707         } LLSC;
2708
2709         /* Call (possibly conditionally) a C function that has side
2710            effects (ie. is "dirty").  See the comments above the
2711            IRDirty type declaration for more information.
2712
2713            ppIRStmt output:
2714               t<tmp> = DIRTY <guard> <effects>
2715                  ::: <callee>(<args>)
2716            eg.
2717               t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2718                     ::: foo{0x380035f4}(t2)
2719         */
2720         struct {
2721            IRDirty* details;
2722         } Dirty;
2723
2724         /* A memory bus event - a fence, or acquisition/release of the
2725            hardware bus lock.  IR optimisation treats all these as fences
2726            across which no memory references may be moved.
2727            ppIRStmt output: MBusEvent-Fence,
2728                             MBusEvent-BusLock, MBusEvent-BusUnlock.
2729         */
2730         struct {
2731            IRMBusEvent event;
2732         } MBE;
2733
2734         /* Conditional exit from the middle of an IRSB.
2735            ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2736                         eg. if (t69) goto {Boring} 0x4000AAA:I32
2737            If <guard> is true, the guest state is also updated by
2738            PUT-ing <dst> at <offsIP>.  This is done because a
2739            taken exit must update the guest program counter.
2740         */
2741         struct {
2742            IRExpr*    guard;    /* Conditional expression */
2743            IRConst*   dst;      /* Jump target (constant only) */
2744            IRJumpKind jk;       /* Jump kind */
2745            Int        offsIP;   /* Guest state offset for IP */
2746         } Exit;
2747      } Ist;
2748   }
2749   IRStmt;
2750
2751/* Statement constructors. */
2752extern IRStmt* IRStmt_NoOp    ( void );
2753extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len, UChar delta );
2754extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2755extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
2756extern IRStmt* IRStmt_PutI    ( IRPutI* details );
2757extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
2758extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
2759extern IRStmt* IRStmt_StoreG  ( IREndness end, IRExpr* addr, IRExpr* data,
2760                                IRExpr* guard );
2761extern IRStmt* IRStmt_LoadG   ( IREndness end, IRLoadGOp cvt, IRTemp dst,
2762                                IRExpr* addr, IRExpr* alt, IRExpr* guard );
2763extern IRStmt* IRStmt_CAS     ( IRCAS* details );
2764extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
2765                                IRExpr* addr, IRExpr* storedata );
2766extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
2767extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
2768extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
2769                                Int offsIP );
2770
2771/* Deep-copy an IRStmt. */
2772extern IRStmt* deepCopyIRStmt ( IRStmt* );
2773
2774/* Pretty-print an IRStmt. */
2775extern void ppIRStmt ( IRStmt* );
2776
2777
2778/* ------------------ Basic Blocks ------------------ */
2779
2780/* Type environments: a bunch of statements, expressions, etc, are
2781   incomplete without an environment indicating the type of each
2782   IRTemp.  So this provides one.  IR temporaries are really just
2783   unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2784   them.
2785*/
2786typedef
2787   struct {
2788      IRType* types;
2789      Int     types_size;
2790      Int     types_used;
2791   }
2792   IRTypeEnv;
2793
2794/* Obtain a new IRTemp */
2795extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2796
2797/* Deep-copy a type environment */
2798extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2799
2800/* Pretty-print a type environment */
2801extern void ppIRTypeEnv ( IRTypeEnv* );
2802
2803
2804/* Code blocks, which in proper compiler terminology are superblocks
2805   (single entry, multiple exit code sequences) contain:
2806
2807   - A table giving a type for each temp (the "type environment")
2808   - An expandable array of statements
2809   - An expression of type 32 or 64 bits, depending on the
2810     guest's word size, indicating the next destination if the block
2811     executes all the way to the end, without a side exit
2812   - An indication of any special actions (JumpKind) needed
2813     for this final jump.
2814   - Offset of the IP field in the guest state.  This will be
2815     updated before the final jump is done.
2816
2817   "IRSB" stands for "IR Super Block".
2818*/
2819typedef
2820   struct {
2821      IRTypeEnv* tyenv;
2822      IRStmt**   stmts;
2823      Int        stmts_size;
2824      Int        stmts_used;
2825      IRExpr*    next;
2826      IRJumpKind jumpkind;
2827      Int        offsIP;
2828   }
2829   IRSB;
2830
2831/* Allocate a new, uninitialised IRSB */
2832extern IRSB* emptyIRSB ( void );
2833
2834/* Deep-copy an IRSB */
2835extern IRSB* deepCopyIRSB ( IRSB* );
2836
2837/* Deep-copy an IRSB, except for the statements list, which set to be
2838   a new, empty, list of statements. */
2839extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2840
2841/* Pretty-print an IRSB */
2842extern void ppIRSB ( IRSB* );
2843
2844/* Append an IRStmt to an IRSB */
2845extern void addStmtToIRSB ( IRSB*, IRStmt* );
2846
2847
2848/*---------------------------------------------------------------*/
2849/*--- Helper functions for the IR                             ---*/
2850/*---------------------------------------------------------------*/
2851
2852/* For messing with IR type environments */
2853extern IRTypeEnv* emptyIRTypeEnv  ( void );
2854
2855/* What is the type of this expression? */
2856extern IRType typeOfIRConst ( IRConst* );
2857extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
2858extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
2859
2860/* What are the arg and result type for this IRLoadGOp? */
2861extern void typeOfIRLoadGOp ( IRLoadGOp cvt,
2862                              /*OUT*/IRType* t_res,
2863                              /*OUT*/IRType* t_arg );
2864
2865/* Sanity check a BB of IR */
2866extern void sanityCheckIRSB ( IRSB*  bb,
2867                              const  HChar* caller,
2868                              Bool   require_flatness,
2869                              IRType guest_word_size );
2870extern Bool isFlatIRStmt ( IRStmt* );
2871
2872/* Is this any value actually in the enumeration 'IRType' ? */
2873extern Bool isPlausibleIRType ( IRType ty );
2874
2875
2876/*---------------------------------------------------------------*/
2877/*--- IR injection                                            ---*/
2878/*---------------------------------------------------------------*/
2879
2880void vex_inject_ir(IRSB *, IREndness);
2881
2882
2883#endif /* ndef __LIBVEX_IR_H */
2884
2885/*---------------------------------------------------------------*/
2886/*---                                             libvex_ir.h ---*/
2887/*---------------------------------------------------------------*/
2888