libvex_ir.h revision 663860b1408516d02ebfcb3a9999a134e6cfb223
1
2/*---------------------------------------------------------------*/
3/*--- begin                                       libvex_ir.h ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2012 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#ifndef __LIBVEX_IR_H
37#define __LIBVEX_IR_H
38
39#include "libvex_basictypes.h"
40
41
42/*---------------------------------------------------------------*/
43/*--- High-level IR description                               ---*/
44/*---------------------------------------------------------------*/
45
46/* Vex IR is an architecture-neutral intermediate representation.
47   Unlike some IRs in systems similar to Vex, it is not like assembly
48   language (ie. a list of instructions).  Rather, it is more like the
49   IR that might be used in a compiler.
50
51   Code blocks
52   ~~~~~~~~~~~
53   The code is broken into small code blocks ("superblocks", type:
54   'IRSB').  Each code block typically represents from 1 to perhaps 50
55   instructions.  IRSBs are single-entry, multiple-exit code blocks.
56   Each IRSB contains three things:
57   - a type environment, which indicates the type of each temporary
58     value present in the IRSB
59   - a list of statements, which represent code
60   - a jump that exits from the end the IRSB
61   Because the blocks are multiple-exit, there can be additional
62   conditional exit statements that cause control to leave the IRSB
63   before the final exit.  Also because of this, IRSBs can cover
64   multiple non-consecutive sequences of code (up to 3).  These are
65   recorded in the type VexGuestExtents (see libvex.h).
66
67   Statements and expressions
68   ~~~~~~~~~~~~~~~~~~~~~~~~~~
69   Statements (type 'IRStmt') represent operations with side-effects,
70   eg.  guest register writes, stores, and assignments to temporaries.
71   Expressions (type 'IRExpr') represent operations without
72   side-effects, eg. arithmetic operations, loads, constants.
73   Expressions can contain sub-expressions, forming expression trees,
74   eg. (3 + (4 * load(addr1)).
75
76   Storage of guest state
77   ~~~~~~~~~~~~~~~~~~~~~~
78   The "guest state" contains the guest registers of the guest machine
79   (ie.  the machine that we are simulating).  It is stored by default
80   in a block of memory supplied by the user of the VEX library,
81   generally referred to as the guest state (area).  To operate on
82   these registers, one must first read ("Get") them from the guest
83   state into a temporary value.  Afterwards, one can write ("Put")
84   them back into the guest state.
85
86   Get and Put are characterised by a byte offset into the guest
87   state, a small integer which effectively gives the identity of the
88   referenced guest register, and a type, which indicates the size of
89   the value to be transferred.
90
91   The basic "Get" and "Put" operations are sufficient to model normal
92   fixed registers on the guest.  Selected areas of the guest state
93   can be treated as a circular array of registers (type:
94   'IRRegArray'), which can be indexed at run-time.  This is done with
95   the "GetI" and "PutI" primitives.  This is necessary to describe
96   rotating register files, for example the x87 FPU stack, SPARC
97   register windows, and the Itanium register files.
98
99   Examples, and flattened vs. unflattened code
100   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101   For example, consider this x86 instruction:
102
103     addl %eax, %ebx
104
105   One Vex IR translation for this code would be this:
106
107     ------ IMark(0x24F275, 7, 0) ------
108     t3 = GET:I32(0)             # get %eax, a 32-bit integer
109     t2 = GET:I32(12)            # get %ebx, a 32-bit integer
110     t1 = Add32(t3,t2)           # addl
111     PUT(0) = t1                 # put %eax
112
113   (For simplicity, this ignores the effects on the condition codes, and
114   the update of the instruction pointer.)
115
116   The "IMark" is an IR statement that doesn't represent actual code.
117   Instead it indicates the address and length of the original
118   instruction.  The numbers 0 and 12 are offsets into the guest state
119   for %eax and %ebx.  The full list of offsets for an architecture
120   <ARCH> can be found in the type VexGuest<ARCH>State in the file
121   VEX/pub/libvex_guest_<ARCH>.h.
122
123   The five statements in this example are:
124   - the IMark
125   - three assignments to temporaries
126   - one register write (put)
127
128   The six expressions in this example are:
129   - two register reads (gets)
130   - one arithmetic (add) operation
131   - three temporaries (two nested within the Add32, one in the PUT)
132
133   The above IR is "flattened", ie. all sub-expressions are "atoms",
134   either constants or temporaries.  An equivalent, unflattened version
135   would be:
136
137     PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139   IR is guaranteed to be flattened at instrumentation-time.  This makes
140   instrumentation easier.  Equivalent flattened and unflattened IR
141   typically results in the same generated code.
142
143   Another example, this one showing loads and stores:
144
145     addl %edx,4(%eax)
146
147   This becomes (again ignoring condition code and instruction pointer
148   updates):
149
150     ------ IMark(0x4000ABA, 3, 0) ------
151     t3 = Add32(GET:I32(0),0x4:I32)
152     t2 = LDle:I32(t3)
153     t1 = GET:I32(8)
154     t0 = Add32(t2,t1)
155     STle(t3) = t0
156
157   The "le" in "LDle" and "STle" is short for "little-endian".
158
159   No need for deallocations
160   ~~~~~~~~~~~~~~~~~~~~~~~~~
161   Although there are allocation functions for various data structures
162   in this file, there are no deallocation functions.  This is because
163   Vex uses a memory allocation scheme that automatically reclaims the
164   memory used by allocated structures once translation is completed.
165   This makes things easier for tools that instruments/transforms code
166   blocks.
167
168   SSAness and typing
169   ~~~~~~~~~~~~~~~~~~
170   The IR is fully typed.  For every IRSB (IR block) it is possible to
171   say unambiguously whether or not it is correctly typed.
172   Incorrectly typed IR has no meaning and the VEX will refuse to
173   process it.  At various points during processing VEX typechecks the
174   IR and aborts if any violations are found.  This seems overkill but
175   makes it a great deal easier to build a reliable JIT.
176
177   IR also has the SSA property.  SSA stands for Static Single
178   Assignment, and what it means is that each IR temporary may be
179   assigned to only once.  This idea became widely used in compiler
180   construction in the mid to late 90s.  It makes many IR-level
181   transformations/code improvements easier, simpler and faster.
182   Whenever it typechecks an IR block, VEX also checks the SSA
183   property holds, and will abort if not so.  So SSAness is
184   mechanically and rigidly enforced.
185*/
186
187/*---------------------------------------------------------------*/
188/*--- Type definitions for the IR                             ---*/
189/*---------------------------------------------------------------*/
190
191/* General comments about naming schemes:
192
193   All publically visible functions contain the name of the primary
194   type on which they operate (IRFoo, IRBar, etc).  Hence you should
195   be able to identify these functions by grepping for "IR[A-Z]".
196
197   For some type 'IRFoo':
198
199   - ppIRFoo is the printing method for IRFoo, printing it to the
200     output channel specified in the LibVEX_Initialise call.
201
202   - eqIRFoo is a structural equality predicate for IRFoos.
203
204   - deepCopyIRFoo is a deep copy constructor for IRFoos.
205     It recursively traverses the entire argument tree and
206     produces a complete new tree.  All types have a deep copy
207     constructor.
208
209   - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210     It creates a new top-level copy of the supplied object,
211     but does not copy any sub-objects.  Only some types have a
212     shallow copy constructor.
213*/
214
215/* ------------------ Types ------------------ */
216
217/* A type indicates the size of a value, and whether it's an integer, a
218   float, or a vector (SIMD) value. */
219typedef
220   enum {
221      Ity_INVALID=0x11000,
222      Ity_I1,
223      Ity_I8,
224      Ity_I16,
225      Ity_I32,
226      Ity_I64,
227      Ity_I128,  /* 128-bit scalar */
228      Ity_F32,   /* IEEE 754 float */
229      Ity_F64,   /* IEEE 754 double */
230      Ity_D32,   /* 32-bit Decimal floating point */
231      Ity_D64,   /* 64-bit Decimal floating point */
232      Ity_D128,  /* 128-bit Decimal floating point */
233      Ity_F128,  /* 128-bit floating point; implementation defined */
234      Ity_V128,  /* 128-bit SIMD */
235      Ity_V256   /* 256-bit SIMD */
236   }
237   IRType;
238
239/* Pretty-print an IRType */
240extern void ppIRType ( IRType );
241
242/* Get the size (in bytes) of an IRType */
243extern Int sizeofIRType ( IRType );
244
245
246/* ------------------ Endianness ------------------ */
247
248/* IREndness is used in load IRExprs and store IRStmts. */
249typedef
250   enum {
251      Iend_LE=0x12000, /* little endian */
252      Iend_BE          /* big endian */
253   }
254   IREndness;
255
256
257/* ------------------ Constants ------------------ */
258
259/* IRConsts are used within 'Const' and 'Exit' IRExprs. */
260
261/* The various kinds of constant. */
262typedef
263   enum {
264      Ico_U1=0x13000,
265      Ico_U8,
266      Ico_U16,
267      Ico_U32,
268      Ico_U64,
269      Ico_F32,   /* 32-bit IEEE754 floating */
270      Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
271                    as a IEEE754 single value. */
272      Ico_F64,   /* 64-bit IEEE754 floating */
273      Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
274                    as a IEEE754 double value. */
275      Ico_V128,  /* 128-bit restricted vector constant, with 1 bit
276                    (repeated 8 times) for each of the 16 x 1-byte lanes */
277      Ico_V256   /* 256-bit restricted vector constant, with 1 bit
278                    (repeated 8 times) for each of the 32 x 1-byte lanes */
279   }
280   IRConstTag;
281
282/* A constant.  Stored as a tagged union.  'tag' indicates what kind of
283   constant this is.  'Ico' is the union that holds the fields.  If an
284   IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
285   and its value can be accessed with 'c.Ico.U32'. */
286typedef
287   struct _IRConst {
288      IRConstTag tag;
289      union {
290         Bool   U1;
291         UChar  U8;
292         UShort U16;
293         UInt   U32;
294         ULong  U64;
295         Float  F32;
296         UInt   F32i;
297         Double F64;
298         ULong  F64i;
299         UShort V128;   /* 16-bit value; see Ico_V128 comment above */
300         UInt   V256;   /* 32-bit value; see Ico_V256 comment above */
301      } Ico;
302   }
303   IRConst;
304
305/* IRConst constructors */
306extern IRConst* IRConst_U1   ( Bool );
307extern IRConst* IRConst_U8   ( UChar );
308extern IRConst* IRConst_U16  ( UShort );
309extern IRConst* IRConst_U32  ( UInt );
310extern IRConst* IRConst_U64  ( ULong );
311extern IRConst* IRConst_F32  ( Float );
312extern IRConst* IRConst_F32i ( UInt );
313extern IRConst* IRConst_F64  ( Double );
314extern IRConst* IRConst_F64i ( ULong );
315extern IRConst* IRConst_V128 ( UShort );
316extern IRConst* IRConst_V256 ( UInt );
317
318/* Deep-copy an IRConst */
319extern IRConst* deepCopyIRConst ( IRConst* );
320
321/* Pretty-print an IRConst */
322extern void ppIRConst ( IRConst* );
323
324/* Compare two IRConsts for equality */
325extern Bool eqIRConst ( IRConst*, IRConst* );
326
327
328/* ------------------ Call targets ------------------ */
329
330/* Describes a helper function to call.  The name part is purely for
331   pretty printing and not actually used.  regparms=n tells the back
332   end that the callee has been declared
333   "__attribute__((regparm(n)))", although indirectly using the
334   VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
335   to construct a non-standard sequence to call a function declared
336   like this.
337
338   mcx_mask is a sop to Memcheck.  It indicates which args should be
339   considered 'always defined' when lazily computing definedness of
340   the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
341   args[1], etc.  If a bit is set, the corresponding arg is excluded
342   (hence "x" in "mcx") from definedness checking.
343*/
344
345typedef
346   struct {
347      Int    regparms;
348      HChar* name;
349      void*  addr;
350      UInt   mcx_mask;
351   }
352   IRCallee;
353
354/* Create an IRCallee. */
355extern IRCallee* mkIRCallee ( Int regparms, HChar* name, void* addr );
356
357/* Deep-copy an IRCallee. */
358extern IRCallee* deepCopyIRCallee ( IRCallee* );
359
360/* Pretty-print an IRCallee. */
361extern void ppIRCallee ( IRCallee* );
362
363
364/* ------------------ Guest state arrays ------------------ */
365
366/* This describes a section of the guest state that we want to
367   be able to index at run time, so as to be able to describe
368   indexed or rotating register files on the guest. */
369typedef
370   struct {
371      Int    base;   /* guest state offset of start of indexed area */
372      IRType elemTy; /* type of each element in the indexed area */
373      Int    nElems; /* number of elements in the indexed area */
374   }
375   IRRegArray;
376
377extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
378
379extern IRRegArray* deepCopyIRRegArray ( IRRegArray* );
380
381extern void ppIRRegArray ( IRRegArray* );
382extern Bool eqIRRegArray ( IRRegArray*, IRRegArray* );
383
384
385/* ------------------ Temporaries ------------------ */
386
387/* This represents a temporary, eg. t1.  The IR optimiser relies on the
388   fact that IRTemps are 32-bit ints.  Do not change them to be ints of
389   any other size. */
390typedef UInt IRTemp;
391
392/* Pretty-print an IRTemp. */
393extern void ppIRTemp ( IRTemp );
394
395#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
396
397
398/* --------------- Primops (arity 1,2,3 and 4) --------------- */
399
400/* Primitive operations that are used in Unop, Binop, Triop and Qop
401   IRExprs.  Once we take into account integer, floating point and SIMD
402   operations of all the different sizes, there are quite a lot of them.
403   Most instructions supported by the architectures that Vex supports
404   (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
405   are not;  they are instead handled with dirty helpers that emulate
406   their functionality.  Such obscure ones are thus not directly visible
407   in the IR, but their effects on guest state (memory and registers)
408   are made visible via the annotations in IRDirty structures.
409*/
410typedef
411   enum {
412      /* -- Do not change this ordering.  The IR generators rely on
413            (eg) Iop_Add64 == IopAdd8 + 3. -- */
414
415      Iop_INVALID=0x14000,
416      Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
417      Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
418      /* Signless mul.  MullS/MullU is elsewhere. */
419      Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
420      Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
421      Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
422      Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
423      Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
424      Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
425      Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
426      /* Integer comparisons. */
427      Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
428      Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
429      /* Tags for unary ops */
430      Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
431
432      /* Exactly like CmpEQ8/16/32/64, but carrying the additional
433         hint that these compute the success/failure of a CAS
434         operation, and hence are almost certainly applied to two
435         copies of the same value, which in turn has implications for
436         Memcheck's instrumentation. */
437      Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
438      Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
439
440      /* -- Ordering not important after here. -- */
441
442      /* Widening multiplies */
443      Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
444      Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
445
446      /* Wierdo integer stuff */
447      Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
448      Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
449      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
450         zero.  You must ensure they are never given a zero argument.
451      */
452
453      /* Standard integer comparisons */
454      Iop_CmpLT32S, Iop_CmpLT64S,
455      Iop_CmpLE32S, Iop_CmpLE64S,
456      Iop_CmpLT32U, Iop_CmpLT64U,
457      Iop_CmpLE32U, Iop_CmpLE64U,
458
459      /* As a sop to Valgrind-Memcheck, the following are useful. */
460      Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
461      Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
462      Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
463      Iop_Max32U, /* unsigned max */
464
465      /* PowerPC-style 3-way integer comparisons.  Without them it is
466         difficult to simulate PPC efficiently.
467         op(x,y) | x < y  = 0x8 else
468                 | x > y  = 0x4 else
469                 | x == y = 0x2
470      */
471      Iop_CmpORD32U, Iop_CmpORD64U,
472      Iop_CmpORD32S, Iop_CmpORD64S,
473
474      /* Division */
475      /* TODO: clarify semantics wrt rounding, negative values, whatever */
476      Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
477      Iop_DivS32,   // ditto, signed
478      Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
479      Iop_DivS64,   // ditto, signed
480      Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi) concat with 64 0's (low))
481      Iop_DivS64E,  // ditto, signed
482      Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi) concat with 32 0's (low))
483      Iop_DivS32E,  // ditto, signed
484
485      Iop_DivModU64to32, // :: I64,I32 -> I64
486                         // of which lo half is div and hi half is mod
487      Iop_DivModS64to32, // ditto, signed
488
489      Iop_DivModU128to64, // :: V128,I64 -> V128
490                          // of which lo half is div and hi half is mod
491      Iop_DivModS128to64, // ditto, signed
492
493      Iop_DivModS64to64, // :: I64,I64 -> I128
494                         // of which lo half is div and hi half is mod
495
496      /* Integer conversions.  Some of these are redundant (eg
497         Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
498         having a complete set reduces the typical dynamic size of IR
499         and makes the instruction selectors easier to write. */
500
501      /* Widening conversions */
502      Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
503                  Iop_16Uto32, Iop_16Uto64,
504                               Iop_32Uto64,
505      Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
506                  Iop_16Sto32, Iop_16Sto64,
507                               Iop_32Sto64,
508
509      /* Narrowing conversions */
510      Iop_64to8, Iop_32to8, Iop_64to16,
511      /* 8 <-> 16 bit conversions */
512      Iop_16to8,      // :: I16 -> I8, low half
513      Iop_16HIto8,    // :: I16 -> I8, high half
514      Iop_8HLto16,    // :: (I8,I8) -> I16
515      /* 16 <-> 32 bit conversions */
516      Iop_32to16,     // :: I32 -> I16, low half
517      Iop_32HIto16,   // :: I32 -> I16, high half
518      Iop_16HLto32,   // :: (I16,I16) -> I32
519      /* 32 <-> 64 bit conversions */
520      Iop_64to32,     // :: I64 -> I32, low half
521      Iop_64HIto32,   // :: I64 -> I32, high half
522      Iop_32HLto64,   // :: (I32,I32) -> I64
523      /* 64 <-> 128 bit conversions */
524      Iop_128to64,    // :: I128 -> I64, low half
525      Iop_128HIto64,  // :: I128 -> I64, high half
526      Iop_64HLto128,  // :: (I64,I64) -> I128
527      /* 1-bit stuff */
528      Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
529      Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
530      Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
531      Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
532      Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
533      Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
534      Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
535      Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
536      Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
537      Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
538
539      /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
540
541      /* --- Simple stuff as mandated by 754. --- */
542
543      /* Binary operations, with rounding. */
544      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
545      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
546
547      /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
548      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
549
550      /* Variants of the above which produce a 64-bit result but which
551         round their result to a IEEE float range first. */
552      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
553      Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
554
555      /* Unary operations, without rounding. */
556      /* :: F64 -> F64 */
557      Iop_NegF64, Iop_AbsF64,
558
559      /* :: F32 -> F32 */
560      Iop_NegF32, Iop_AbsF32,
561
562      /* Unary operations, with rounding. */
563      /* :: IRRoundingMode(I32) x F64 -> F64 */
564      Iop_SqrtF64, Iop_SqrtF64r32,
565
566      /* :: IRRoundingMode(I32) x F32 -> F32 */
567      Iop_SqrtF32,
568
569      /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
570            0x45 Unordered
571            0x01 LT
572            0x00 GT
573            0x40 EQ
574         This just happens to be the Intel encoding.  The values
575         are recorded in the type IRCmpF64Result.
576      */
577      /* :: F64 x F64 -> IRCmpF64Result(I32) */
578      Iop_CmpF64,
579      Iop_CmpF32,
580      Iop_CmpF128,
581
582      /* --- Int to/from FP conversions. --- */
583
584      /* For the most part, these take a first argument :: Ity_I32 (as
585         IRRoundingMode) which is an indication of the rounding mode
586         to use, as per the following encoding ("the standard
587         encoding"):
588            00b  to nearest (the default)
589            01b  to -infinity
590            10b  to +infinity
591            11b  to zero
592         This just happens to be the Intel encoding.  For reference only,
593         the PPC encoding is:
594            00b  to nearest (the default)
595            01b  to zero
596            10b  to +infinity
597            11b  to -infinity
598         Any PPC -> IR front end will have to translate these PPC
599         encodings, as encoded in the guest state, to the standard
600         encodings, to pass to the primops.
601         For reference only, the ARM VFP encoding is:
602            00b  to nearest
603            01b  to +infinity
604            10b  to -infinity
605            11b  to zero
606         Again, this will have to be converted to the standard encoding
607         to pass to primops.
608
609         If one of these conversions gets an out-of-range condition,
610         or a NaN, as an argument, the result is host-defined.  On x86
611         the "integer indefinite" value 0x80..00 is produced.  On PPC
612         it is either 0x80..00 or 0x7F..FF depending on the sign of
613         the argument.
614
615         On ARMvfp, when converting to a signed integer result, the
616         overflow result is 0x80..00 for negative args and 0x7F..FF
617         for positive args.  For unsigned integer results it is
618         0x00..00 and 0xFF..FF respectively.
619
620         Rounding is required whenever the destination type cannot
621         represent exactly all values of the source type.
622      */
623      Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
624      Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
625      Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
626      Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
627
628      Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
629
630      Iop_I16StoF64, /*                       signed I16 -> F64 */
631      Iop_I32StoF64, /*                       signed I32 -> F64 */
632      Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
633      Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
634      Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
635
636      Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
637
638      Iop_F32toI16S, /* IRRoundingMode(I32) x F32 -> signed I16 */
639      Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
640      Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
641
642      Iop_I16StoF32, /*                       signed I16 -> F32 */
643      Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
644      Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
645
646      /* Conversion between floating point formats */
647      Iop_F32toF64,  /*                       F32 -> F64 */
648      Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
649
650      /* Reinterpretation.  Take an F64 and produce an I64 with
651         the same bit pattern, or vice versa. */
652      Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
653      Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
654
655      /* Support for 128-bit floating point */
656      Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
657      Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
658      Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
659
660      /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
661      Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
662
663      /* :: F128 -> F128 */
664      Iop_NegF128, Iop_AbsF128,
665
666      /* :: IRRoundingMode(I32) x F128 -> F128 */
667      Iop_SqrtF128,
668
669      Iop_I32StoF128, /*                signed I32  -> F128 */
670      Iop_I64StoF128, /*                signed I64  -> F128 */
671      Iop_F32toF128,  /*                       F32  -> F128 */
672      Iop_F64toF128,  /*                       F64  -> F128 */
673
674      Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
675      Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
676      Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
677      Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
678
679      /* --- guest x86/amd64 specifics, not mandated by 754. --- */
680
681      /* Binary ops, with rounding. */
682      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
683      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
684      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
685      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
686      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
687      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
688      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
689      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
690      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
691      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
692         as the IEEE mandated RemF64, except it is limited in the
693         range of its operand.  Hence the partialness. */
694
695      /* Unary ops, with rounding. */
696      /* :: IRRoundingMode(I32) x F64 -> F64 */
697      Iop_SinF64,    /* FSIN */
698      Iop_CosF64,    /* FCOS */
699      Iop_TanF64,    /* FTAN */
700      Iop_2xm1F64,   /* (2^arg - 1.0) */
701      Iop_RoundF64toInt, /* F64 value to nearest integral value (still
702                            as F64) */
703      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
704                            as F32) */
705
706      /* --- guest s390 specifics, not mandated by 754. --- */
707
708      /* Fused multiply-add/sub */
709      /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
710            (computes op3 * op2 +/- op1 */
711      Iop_MAddF32, Iop_MSubF32,
712
713      /* --- guest ppc32/64 specifics, not mandated by 754. --- */
714
715      /* Ternary operations, with rounding. */
716      /* Fused multiply-add/sub, with 112-bit intermediate
717         precision for ppc.
718         Also used to implement fused multiply-add/sub for s390. */
719      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
720            (computes arg2 * arg3 +/- arg4) */
721      Iop_MAddF64, Iop_MSubF64,
722
723      /* Variants of the above which produce a 64-bit result but which
724         round their result to a IEEE float range first. */
725      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
726      Iop_MAddF64r32, Iop_MSubF64r32,
727
728      /* :: F64 -> F64 */
729      Iop_Est5FRSqrt,    /* reciprocal square root estimate, 5 good bits */
730      Iop_RoundF64toF64_NEAREST, /* frin */
731      Iop_RoundF64toF64_NegINF,  /* frim */
732      Iop_RoundF64toF64_PosINF,  /* frip */
733      Iop_RoundF64toF64_ZERO,    /* friz */
734
735      /* :: F64 -> F32 */
736      Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
737
738      /* :: IRRoundingMode(I32) x F64 -> F64 */
739      Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
740      /* NB: pretty much the same as Iop_F64toF32, except no change
741         of type. */
742
743      /* :: F64 -> I32 */
744      Iop_CalcFPRF, /* Calc 5 fpscr[FPRF] bits (Class, <, =, >, Unord)
745                       from FP result */
746
747      /* ------------------ 32-bit SIMD Integer ------------------ */
748
749      /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
750      Iop_QAdd32S,
751      Iop_QSub32S,
752
753      /* 16x2 add/sub, also signed/unsigned saturating variants */
754      Iop_Add16x2, Iop_Sub16x2,
755      Iop_QAdd16Sx2, Iop_QAdd16Ux2,
756      Iop_QSub16Sx2, Iop_QSub16Ux2,
757
758      /* 16x2 signed/unsigned halving add/sub.  For each lane, these
759         compute bits 16:1 of (eg) sx(argL) + sx(argR),
760         or zx(argL) - zx(argR) etc. */
761      Iop_HAdd16Ux2, Iop_HAdd16Sx2,
762      Iop_HSub16Ux2, Iop_HSub16Sx2,
763
764      /* 8x4 add/sub, also signed/unsigned saturating variants */
765      Iop_Add8x4, Iop_Sub8x4,
766      Iop_QAdd8Sx4, Iop_QAdd8Ux4,
767      Iop_QSub8Sx4, Iop_QSub8Ux4,
768
769      /* 8x4 signed/unsigned halving add/sub.  For each lane, these
770         compute bits 8:1 of (eg) sx(argL) + sx(argR),
771         or zx(argL) - zx(argR) etc. */
772      Iop_HAdd8Ux4, Iop_HAdd8Sx4,
773      Iop_HSub8Ux4, Iop_HSub8Sx4,
774
775      /* 8x4 sum of absolute unsigned differences. */
776      Iop_Sad8Ux4,
777
778      /* MISC (vector integer cmp != 0) */
779      Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
780
781      /* ------------------ 64-bit SIMD FP ------------------------ */
782
783      /* Convertion to/from int */
784      Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
785      Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
786      /* Fixed32 format is floating-point number with fixed number of fraction
787         bits. The number of fraction bits is passed as a second argument of
788         type I8. */
789      Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
790      Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
791
792      /* Binary operations */
793      Iop_Max32Fx2,      Iop_Min32Fx2,
794      /* Pairwise Min and Max. See integer pairwise operations for more
795         details. */
796      Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
797      /* Note: For the following compares, the arm front-end assumes a
798         nan in a lane of either argument returns zero for that lane. */
799      Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
800
801      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
802      element in the operand vector, and places the results in the destination
803      vector.  */
804      Iop_Recip32Fx2,
805
806      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
807         Note, that if one of the arguments is zero and another one is infinity
808         of arbitrary sign the result of the operation is 2.0. */
809      Iop_Recps32Fx2,
810
811      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
812         square root of each element in the operand vector. */
813      Iop_Rsqrte32Fx2,
814
815      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
816         Note, that of one of the arguments is zero and another one is infiinty
817         of arbitrary sign the result of the operation is 1.5. */
818      Iop_Rsqrts32Fx2,
819
820      /* Unary */
821      Iop_Neg32Fx2, Iop_Abs32Fx2,
822
823      /* ------------------ 64-bit SIMD Integer. ------------------ */
824
825      /* MISC (vector integer cmp != 0) */
826      Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
827
828      /* ADDITION (normal / unsigned sat / signed sat) */
829      Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
830      Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
831      Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
832
833      /* PAIRWISE operations */
834      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
835            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
836      Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
837      Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
838      Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
839      Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
840      Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
841      /* Longening variant is unary. The resulting vector contains two times
842         less elements than operand, but they are two times wider.
843         Example:
844            Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
845               where a+b and c+d are unsigned 32-bit values. */
846      Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
847      Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
848
849      /* SUBTRACTION (normal / unsigned sat / signed sat) */
850      Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
851      Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
852      Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
853
854      /* ABSOLUTE VALUE */
855      Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
856
857      /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
858      Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
859      Iop_Mul32Fx2,
860      Iop_MulHi16Ux4,
861      Iop_MulHi16Sx4,
862      /* Plynomial multiplication treats it's arguments as coefficients of
863         polynoms over {0, 1}. */
864      Iop_PolynomialMul8x8,
865
866      /* Vector Saturating Doubling Multiply Returning High Half and
867         Vector Saturating Rounding Doubling Multiply Returning High Half */
868      /* These IROp's multiply corresponding elements in two vectors, double
869         the results, and place the most significant half of the final results
870         in the destination vector. The results are truncated or rounded. If
871         any of the results overflow, they are saturated. */
872      Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
873      Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
874
875      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
876      Iop_Avg8Ux8,
877      Iop_Avg16Ux4,
878
879      /* MIN/MAX */
880      Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
881      Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
882      Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
883      Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
884
885      /* COMPARISON */
886      Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
887      Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
888      Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
889
890      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
891         bit) */
892      Iop_Cnt8x8,
893      Iop_Clz8Sx8, Iop_Clz16Sx4, Iop_Clz32Sx2,
894      Iop_Cls8Sx8, Iop_Cls16Sx4, Iop_Cls32Sx2,
895
896      /* VECTOR x VECTOR SHIFT / ROTATE */
897      Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
898      Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
899      Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
900      Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
901
902      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
903      Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
904      Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
905      Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
906
907      /* VECTOR x VECTOR SATURATING SHIFT */
908      Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
909      Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
910      /* VECTOR x INTEGER SATURATING SHIFT */
911      Iop_QShlN8Sx8, Iop_QShlN16Sx4, Iop_QShlN32Sx2, Iop_QShlN64Sx1,
912      Iop_QShlN8x8, Iop_QShlN16x4, Iop_QShlN32x2, Iop_QShlN64x1,
913      Iop_QSalN8x8, Iop_QSalN16x4, Iop_QSalN32x2, Iop_QSalN64x1,
914
915      /* NARROWING (binary)
916         -- narrow 2xI64 into 1xI64, hi half from left arg */
917      /* For saturated narrowing, I believe there are 4 variants of
918         the basic arithmetic operation, depending on the signedness
919         of argument and result.  Here are examples that exemplify
920         what I mean:
921
922         QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
923                                      return x[7:0];
924
925         QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
926                                      if (x >s  127) x = 127;
927                                      return x[7:0];
928
929         QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
930                                      return x[7:0];
931
932         QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
933                                      if (x >s 255) x = 255;
934                                      return x[7:0];
935      */
936      Iop_QNarrowBin16Sto8Ux8,
937      Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
938      Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
939
940      /* INTERLEAVING */
941      /* Interleave lanes from low or high halves of
942         operands.  Most-significant result lane is from the left
943         arg. */
944      Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
945      Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
946      /* Interleave odd/even lanes of operands.  Most-significant result lane
947         is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
948         identical to Interleave{HI,LO}32x2 and so are omitted.*/
949      Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
950      Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
951
952
953      /* CONCATENATION -- build a new value by concatenating either
954         the even or odd lanes of both operands.  Note that
955         Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
956         and so are omitted. */
957      Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
958      Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
959
960      /* GET / SET elements of VECTOR
961         GET is binop (I64, I8) -> I<elem_size>
962         SET is triop (I64, I8, I<elem_size>) -> I64 */
963      /* Note: the arm back-end handles only constant second argument */
964      Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
965      Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
966
967      /* DUPLICATING -- copy value to all lanes */
968      Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
969
970      /* EXTRACT -- copy 8-arg3 highest bytes from arg1 to 8-arg3 lowest bytes
971         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
972         result.
973         It is a triop: (I64, I64, I8) -> I64 */
974      /* Note: the arm back-end handles only constant third argumnet. */
975      Iop_Extract64,
976
977      /* REVERSE the order of elements in each Half-words, Words,
978         Double-words */
979      /* Examples:
980            Reverse16_8x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
981            Reverse32_8x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
982            Reverse64_8x8([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
983      Iop_Reverse16_8x8,
984      Iop_Reverse32_8x8, Iop_Reverse32_16x4,
985      Iop_Reverse64_8x8, Iop_Reverse64_16x4, Iop_Reverse64_32x2,
986
987      /* PERMUTING -- copy src bytes to dst,
988         as indexed by control vector bytes:
989            for i in 0 .. 7 . result[i] = argL[ argR[i] ]
990         argR[i] values may only be in the range 0 .. 7, else behaviour
991         is undefined. */
992      Iop_Perm8x8,
993
994      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
995         See floating-point equiwalents for details. */
996      Iop_Recip32x2, Iop_Rsqrte32x2,
997
998      /* ------------------ Decimal Floating Point ------------------ */
999
1000      /* ARITHMETIC INSTRUCTIONS   64-bit
1001	 ----------------------------------
1002	 IRRoundingModeDFP(I32) X D64 X D64 -> D64
1003      */
1004      Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64,
1005
1006      /* ARITHMETIC INSTRUCTIONS  128-bit
1007	 ----------------------------------
1008	 IRRoundingModeDFP(I32) X D128 X D128 -> D128
1009      */
1010      Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128,
1011
1012      /* SHIFT SIGNIFICAND INSTRUCTIONS
1013       *    The DFP significand is shifted by the number of digits specified
1014       *    by the U8 operand.  Digits shifted out of the leftmost digit are
1015       *    lost. Zeros are supplied to the vacated positions on the right.
1016       *    The sign of the result is the same as the sign of the original
1017       *    operand.
1018       *
1019       * D64 x U8  -> D64    left shift and right shift respectively */
1020      Iop_ShlD64, Iop_ShrD64,
1021
1022      /* D128 x U8  -> D128  left shift and right shift respectively */
1023      Iop_ShlD128, Iop_ShrD128,
1024
1025
1026      /* FORMAT CONVERSION INSTRUCTIONS
1027       *   D32 -> D64
1028       */
1029      Iop_D32toD64,
1030
1031      /*   D64 -> D128 */
1032      Iop_D64toD128,
1033
1034      /*   I64S -> D128 */
1035      Iop_I64StoD128,
1036
1037      /*   IRRoundingModeDFP(I32) x D64 -> D32 */
1038      Iop_D64toD32,
1039
1040      /*   IRRoundingModeDFP(I32) x D128 -> D64 */
1041      Iop_D128toD64,
1042
1043      /*   IRRoundingModeDFP(I32) x I64 -> D64 */
1044      Iop_I64StoD64,
1045
1046      /*   IRRoundingModeDFP(I32) x D64 -> I64 */
1047      Iop_D64toI64S,
1048
1049      /*   IRRoundingModeDFP(I32) x D128 -> I64 */
1050      Iop_D128toI64S,
1051
1052      /* ROUNDING INSTRUCTIONS
1053       * IRRoundingMode(I32) x D64 -> D64
1054       * The D64 operand, if a finite number, is rounded to an integer value.
1055       */
1056      Iop_RoundD64toInt,
1057
1058      /* IRRoundingMode(I32) x D128 -> D128 */
1059      Iop_RoundD128toInt,
1060
1061      /* COMPARE INSTRUCTIONS
1062       * D64 x D64 -> IRCmpD64Result(I32) */
1063      Iop_CmpD64,
1064
1065      /* D128 x D128 -> IRCmpD64Result(I32) */
1066      Iop_CmpD128,
1067
1068      /* QUANTIZE AND ROUND INSTRUCTIONS
1069       * The source operand is converted and rounded to the form with the
1070       * immediate exponent specified by the rounding and exponent parameter.
1071       *
1072       * The second operand is converted and rounded to the form
1073       * of the first operand's exponent and the rounded based on the specified
1074       * rounding mode parameter.
1075       *
1076       * IRRoundingModeDFP(I32) x D64 x D64-> D64 */
1077      Iop_QuantizeD64,
1078
1079      /* IRRoundingModeDFP(I32) x D128 x D128 -> D128 */
1080      Iop_QuantizeD128,
1081
1082      /* IRRoundingModeDFP(I32) x I8 x D64 -> D64
1083       *    The Decimal Floating point operand is rounded to the requested
1084       *    significance given by the I8 operand as specified by the rounding
1085       *    mode.
1086       */
1087      Iop_SignificanceRoundD64,
1088
1089      /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
1090      Iop_SignificanceRoundD128,
1091
1092      /* EXTRACT AND INSERT INSTRUCTIONS
1093       * D64 -> I64
1094       *    The exponent of the D32 or D64 operand is extracted.  The
1095       *    extracted exponent is converted to a 64-bit signed binary integer.
1096       */
1097      Iop_ExtractExpD64,
1098
1099      /* D128 -> I64 */
1100      Iop_ExtractExpD128,
1101
1102      /* I64 x I64  -> D64
1103       *    The exponent is specified by the first I64 operand the signed
1104       *    significand is given by the second I64 value.  The result is a D64
1105       *    value consisting of the specified significand and exponent whose
1106       *    sign is that of the specified significand.
1107       */
1108      Iop_InsertExpD64,
1109
1110      /* I64 x I128 -> D128 */
1111      Iop_InsertExpD128,
1112
1113      /* Support for 128-bit DFP type */
1114      Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64,
1115
1116      /*  I64 -> I64
1117       *     Convert 50-bit densely packed BCD string to 60 bit BCD string
1118       */
1119      Iop_DPBtoBCD,
1120
1121      /* I64 -> I64
1122       *     Convert 60 bit BCD string to 50-bit densely packed BCD string
1123       */
1124      Iop_BCDtoDPB,
1125
1126      /* Conversion I64 -> D64 */
1127      Iop_ReinterpI64asD64,
1128
1129      /* Conversion D64 -> I64 */
1130      Iop_ReinterpD64asI64,
1131
1132      /* ------------------ 128-bit SIMD FP. ------------------ */
1133
1134      /* --- 32x4 vector FP --- */
1135
1136      /* binary */
1137      Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
1138      Iop_Max32Fx4, Iop_Min32Fx4,
1139      Iop_Add32Fx2, Iop_Sub32Fx2,
1140      /* Note: For the following compares, the ppc and arm front-ends assume a
1141         nan in a lane of either argument returns zero for that lane. */
1142      Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
1143      Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
1144
1145      /* Vector Absolute */
1146      Iop_Abs32Fx4,
1147
1148      /* Pairwise Max and Min. See integer pairwise operations for details. */
1149      Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1150
1151      /* unary */
1152      Iop_Sqrt32Fx4, Iop_RSqrt32Fx4,
1153      Iop_Neg32Fx4,
1154
1155      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1156      element in the operand vector, and places the results in the destination
1157      vector.  */
1158      Iop_Recip32Fx4,
1159
1160      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1161         Note, that if one of the arguments is zero and another one is infinity
1162         of arbitrary sign the result of the operation is 2.0. */
1163      Iop_Recps32Fx4,
1164
1165      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1166         square root of each element in the operand vector. */
1167      Iop_Rsqrte32Fx4,
1168
1169      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1170         Note, that of one of the arguments is zero and another one is infiinty
1171         of arbitrary sign the result of the operation is 1.5. */
1172      Iop_Rsqrts32Fx4,
1173
1174
1175      /* --- Int to/from FP conversion --- */
1176      /* Unlike the standard fp conversions, these irops take no
1177         rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1178         indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1179      Iop_I32UtoFx4,  Iop_I32StoFx4,       /* I32x4 -> F32x4       */
1180      Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
1181      Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (with saturation) */
1182      Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
1183      Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
1184      /* Fixed32 format is floating-point number with fixed number of fraction
1185         bits. The number of fraction bits is passed as a second argument of
1186         type I8. */
1187      Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1188      Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1189
1190      /* --- Single to/from half conversion --- */
1191      /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1192      Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
1193
1194      /* --- 32x4 lowest-lane-only scalar FP --- */
1195
1196      /* In binary cases, upper 3/4 is copied from first operand.  In
1197         unary cases, upper 3/4 is copied from the operand. */
1198
1199      /* binary */
1200      Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1201      Iop_Max32F0x4, Iop_Min32F0x4,
1202      Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1203
1204      /* unary */
1205      Iop_Recip32F0x4, Iop_Sqrt32F0x4, Iop_RSqrt32F0x4,
1206
1207      /* --- 64x2 vector FP --- */
1208
1209      /* binary */
1210      Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1211      Iop_Max64Fx2, Iop_Min64Fx2,
1212      Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1213
1214      /* unary */
1215      Iop_Recip64Fx2, Iop_Sqrt64Fx2, Iop_RSqrt64Fx2,
1216
1217      /* --- 64x2 lowest-lane-only scalar FP --- */
1218
1219      /* In binary cases, upper half is copied from first operand.  In
1220         unary cases, upper half is copied from the operand. */
1221
1222      /* binary */
1223      Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1224      Iop_Max64F0x2, Iop_Min64F0x2,
1225      Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1226
1227      /* unary */
1228      Iop_Recip64F0x2, Iop_Sqrt64F0x2, Iop_RSqrt64F0x2,
1229
1230      /* --- pack / unpack --- */
1231
1232      /* 64 <-> 128 bit vector */
1233      Iop_V128to64,     // :: V128 -> I64, low half
1234      Iop_V128HIto64,   // :: V128 -> I64, high half
1235      Iop_64HLtoV128,   // :: (I64,I64) -> V128
1236
1237      Iop_64UtoV128,
1238      Iop_SetV128lo64,
1239
1240      /* 32 <-> 128 bit vector */
1241      Iop_32UtoV128,
1242      Iop_V128to32,     // :: V128 -> I32, lowest lane
1243      Iop_SetV128lo32,  // :: (V128,I32) -> V128
1244
1245      /* ------------------ 128-bit SIMD Integer. ------------------ */
1246
1247      /* BITWISE OPS */
1248      Iop_NotV128,
1249      Iop_AndV128, Iop_OrV128, Iop_XorV128,
1250
1251      /* VECTOR SHIFT (shift amt :: Ity_I8) */
1252      Iop_ShlV128, Iop_ShrV128,
1253
1254      /* MISC (vector integer cmp != 0) */
1255      Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1256
1257      /* ADDITION (normal / unsigned sat / signed sat) */
1258      Iop_Add8x16,   Iop_Add16x8,   Iop_Add32x4,   Iop_Add64x2,
1259      Iop_QAdd8Ux16, Iop_QAdd16Ux8, Iop_QAdd32Ux4, Iop_QAdd64Ux2,
1260      Iop_QAdd8Sx16, Iop_QAdd16Sx8, Iop_QAdd32Sx4, Iop_QAdd64Sx2,
1261
1262      /* SUBTRACTION (normal / unsigned sat / signed sat) */
1263      Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
1264      Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1265      Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1266
1267      /* MULTIPLICATION (normal / high half of signed/unsigned) */
1268      Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
1269                    Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1270                    Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1271      /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1272      Iop_MullEven8Ux16, Iop_MullEven16Ux8,
1273      Iop_MullEven8Sx16, Iop_MullEven16Sx8,
1274      /* FIXME: document these */
1275      Iop_Mull8Ux8, Iop_Mull8Sx8,
1276      Iop_Mull16Ux4, Iop_Mull16Sx4,
1277      Iop_Mull32Ux2, Iop_Mull32Sx2,
1278      /* Vector Saturating Doubling Multiply Returning High Half and
1279         Vector Saturating Rounding Doubling Multiply Returning High Half */
1280      /* These IROp's multiply corresponding elements in two vectors, double
1281         the results, and place the most significant half of the final results
1282         in the destination vector. The results are truncated or rounded. If
1283         any of the results overflow, they are saturated. */
1284      Iop_QDMulHi16Sx8, Iop_QDMulHi32Sx4,
1285      Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4,
1286      /* Doubling saturating multiplication (long) (I64, I64) -> V128 */
1287      Iop_QDMulLong16Sx4, Iop_QDMulLong32Sx2,
1288      /* Plynomial multiplication treats it's arguments as coefficients of
1289         polynoms over {0, 1}. */
1290      Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1291      Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
1292
1293      /* PAIRWISE operations */
1294      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1295            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1296      Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1297      Iop_PwAdd32Fx2,
1298      /* Longening variant is unary. The resulting vector contains two times
1299         less elements than operand, but they are two times wider.
1300         Example:
1301            Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1302               where a+b and c+d are unsigned 32-bit values. */
1303      Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1304      Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1305
1306      /* ABSOLUTE VALUE */
1307      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4,
1308
1309      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1310      Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1311      Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1312
1313      /* MIN/MAX */
1314      Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4,
1315      Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4,
1316      Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4,
1317      Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4,
1318
1319      /* COMPARISON */
1320      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
1321      Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1322      Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4,
1323
1324      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1325         bit) */
1326      Iop_Cnt8x16,
1327      Iop_Clz8Sx16, Iop_Clz16Sx8, Iop_Clz32Sx4,
1328      Iop_Cls8Sx16, Iop_Cls16Sx8, Iop_Cls32Sx4,
1329
1330      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1331      Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1332      Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1333      Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1334
1335      /* VECTOR x VECTOR SHIFT / ROTATE */
1336      Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1337      Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1338      Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1339      Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1340      Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4,
1341
1342      /* VECTOR x VECTOR SATURATING SHIFT */
1343      Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1344      Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1345      /* VECTOR x INTEGER SATURATING SHIFT */
1346      Iop_QShlN8Sx16, Iop_QShlN16Sx8, Iop_QShlN32Sx4, Iop_QShlN64Sx2,
1347      Iop_QShlN8x16, Iop_QShlN16x8, Iop_QShlN32x4, Iop_QShlN64x2,
1348      Iop_QSalN8x16, Iop_QSalN16x8, Iop_QSalN32x4, Iop_QSalN64x2,
1349
1350      /* NARROWING (binary)
1351         -- narrow 2xV128 into 1xV128, hi half from left arg */
1352      /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1353      Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1354      Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1355      Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1356      Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1357
1358      /* NARROWING (unary) -- narrow V128 into I64 */
1359      Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1360      /* Saturating narrowing from signed source to signed/unsigned destination */
1361      Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1362      Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1363      /* Saturating narrowing from unsigned source to unsigned destination */
1364      Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1365
1366      /* WIDENING -- sign or zero extend each element of the argument
1367         vector to the twice original size.  The resulting vector consists of
1368         the same number of elements but each element and the vector itself
1369         are twice as wide.
1370         All operations are I64->V128.
1371         Example
1372            Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1373               where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1374      Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1375      Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1376
1377      /* INTERLEAVING */
1378      /* Interleave lanes from low or high halves of
1379         operands.  Most-significant result lane is from the left
1380         arg. */
1381      Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1382      Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1383      Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1384      Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1385      /* Interleave odd/even lanes of operands.  Most-significant result lane
1386         is from the left arg. */
1387      Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1388      Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1389      Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1390
1391      /* CONCATENATION -- build a new value by concatenating either
1392         the even or odd lanes of both operands. */
1393      Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1394      Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1395
1396      /* GET elements of VECTOR
1397         GET is binop (V128, I8) -> I<elem_size> */
1398      /* Note: the arm back-end handles only constant second argument. */
1399      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1400
1401      /* DUPLICATING -- copy value to all lanes */
1402      Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
1403
1404      /* EXTRACT -- copy 16-arg3 highest bytes from arg1 to 16-arg3 lowest bytes
1405         of result and arg3 lowest bytes of arg2 to arg3 highest bytes of
1406         result.
1407         It is a triop: (V128, V128, I8) -> V128 */
1408      /* Note: the ARM back end handles only constant arg3 in this operation. */
1409      Iop_ExtractV128,
1410
1411      /* REVERSE the order of elements in each Half-words, Words,
1412         Double-words */
1413      /* Examples:
1414            Reverse32_16x8([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1415            Reverse64_16x8([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e] */
1416      Iop_Reverse16_8x16,
1417      Iop_Reverse32_8x16, Iop_Reverse32_16x8,
1418      Iop_Reverse64_8x16, Iop_Reverse64_16x8, Iop_Reverse64_32x4,
1419
1420      /* PERMUTING -- copy src bytes to dst,
1421         as indexed by control vector bytes:
1422            for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1423         argR[i] values may only be in the range 0 .. 15, else behaviour
1424         is undefined. */
1425      Iop_Perm8x16,
1426      Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
1427
1428      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1429         See floating-point equiwalents for details. */
1430      Iop_Recip32x4, Iop_Rsqrte32x4,
1431
1432      /* ------------------ 256-bit SIMD Integer. ------------------ */
1433
1434      /* Pack/unpack */
1435      Iop_V256to64_0,  // V256 -> I64, extract least significant lane
1436      Iop_V256to64_1,
1437      Iop_V256to64_2,
1438      Iop_V256to64_3,  // V256 -> I64, extract most significant lane
1439
1440      Iop_64x4toV256,  // (I64,I64,I64,I64)->V256
1441                       // first arg is most significant lane
1442
1443      Iop_V256toV128_0, // V256 -> V128, less significant lane
1444      Iop_V256toV128_1, // V256 -> V128, more significant lane
1445      Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif
1446
1447      Iop_AndV256,
1448      Iop_OrV256,
1449      Iop_XorV256,
1450      Iop_NotV256,
1451
1452      /* MISC (vector integer cmp != 0) */
1453      Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
1454
1455      /* ------------------ 256-bit SIMD FP. ------------------ */
1456      Iop_Add64Fx4,
1457      Iop_Sub64Fx4,
1458      Iop_Mul64Fx4,
1459      Iop_Div64Fx4,
1460      Iop_Add32Fx8,
1461      Iop_Sub32Fx8,
1462      Iop_Mul32Fx8,
1463      Iop_Div32Fx8,
1464
1465      Iop_Sqrt32Fx8,
1466      Iop_Sqrt64Fx4,
1467      Iop_RSqrt32Fx8,
1468      Iop_Recip32Fx8,
1469
1470      Iop_Max32Fx8, Iop_Min32Fx8,
1471      Iop_Max64Fx4, Iop_Min64Fx4
1472   }
1473   IROp;
1474
1475/* Pretty-print an op. */
1476extern void ppIROp ( IROp );
1477
1478
1479/* Encoding of IEEE754-specified rounding modes.  This is the same as
1480   the encoding used by Intel IA32 to indicate x87 rounding mode.
1481   Note, various front and back ends rely on the actual numerical
1482   values of these, so do not change them. */
1483typedef
1484   enum {
1485      Irrm_NEAREST = 0,
1486      Irrm_NegINF  = 1,
1487      Irrm_PosINF  = 2,
1488      Irrm_ZERO    = 3
1489   }
1490   IRRoundingMode;
1491
1492/* DFP encoding of IEEE754 2008 specified rounding modes extends the two bit
1493 * binary floating point rounding mode (IRRoundingMode) to three bits.  The
1494 * DFP rounding modes are a super set of the binary rounding modes.  The
1495 * encoding was chosen such that the mapping of the least significant two bits
1496 * of the IR to POWER encodings is same.  The upper IR encoding bit is just
1497 * a logical OR of the upper rounding mode bit from the POWER encoding.
1498 */
1499typedef
1500   enum {
1501      Irrm_DFP_NEAREST              = 0,  // Round to nearest, ties to even
1502      Irrm_DFP_NegINF               = 1,  // Round to negative infinity
1503      Irrm_DFP_PosINF               = 2,  // Round to posative infinity
1504      Irrm_DFP_ZERO                 = 3,  // Round toward zero
1505      Irrm_DFP_NEAREST_TIE_AWAY_0   = 4,  // Round to nearest, ties away from 0
1506      Irrm_DFP_PREPARE_SHORTER      = 5,  // Round to prepare for storter
1507                                          // precision
1508      Irrm_DFP_AWAY_FROM_ZERO       = 6,  // Round to away from 0
1509      Irrm_DFP_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
1510   }
1511   IRRoundingModeDFP;
1512
1513/* Floating point comparison result values, as created by Iop_CmpF64.
1514   This is also derived from what IA32 does. */
1515typedef
1516   enum {
1517      Ircr_UN = 0x45,
1518      Ircr_LT = 0x01,
1519      Ircr_GT = 0x00,
1520      Ircr_EQ = 0x40
1521   }
1522   IRCmpF64Result;
1523
1524typedef IRCmpF64Result IRCmpF32Result;
1525typedef IRCmpF64Result IRCmpF128Result;
1526
1527/* ------------------ Expressions ------------------ */
1528
1529typedef struct _IRQop   IRQop;   /* forward declaration */
1530typedef struct _IRTriop IRTriop; /* forward declaration */
1531
1532
1533/* The different kinds of expressions.  Their meaning is explained below
1534   in the comments for IRExpr. */
1535typedef
1536   enum {
1537      Iex_Binder=0x15000,
1538      Iex_Get,
1539      Iex_GetI,
1540      Iex_RdTmp,
1541      Iex_Qop,
1542      Iex_Triop,
1543      Iex_Binop,
1544      Iex_Unop,
1545      Iex_Load,
1546      Iex_Const,
1547      Iex_Mux0X,
1548      Iex_CCall
1549   }
1550   IRExprTag;
1551
1552/* An expression.  Stored as a tagged union.  'tag' indicates what kind
1553   of expression this is.  'Iex' is the union that holds the fields.  If
1554   an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1555   expression, and the fields can be accessed with
1556   'e.Iex.Load.<fieldname>'.
1557
1558   For each kind of expression, we show what it looks like when
1559   pretty-printed with ppIRExpr().
1560*/
1561typedef
1562   struct _IRExpr
1563   IRExpr;
1564
1565struct _IRExpr {
1566   IRExprTag tag;
1567   union {
1568      /* Used only in pattern matching within Vex.  Should not be seen
1569         outside of Vex. */
1570      struct {
1571         Int binder;
1572      } Binder;
1573
1574      /* Read a guest register, at a fixed offset in the guest state.
1575         ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1576      */
1577      struct {
1578         Int    offset;    /* Offset into the guest state */
1579         IRType ty;        /* Type of the value being read */
1580      } Get;
1581
1582      /* Read a guest register at a non-fixed offset in the guest
1583         state.  This allows circular indexing into parts of the guest
1584         state, which is essential for modelling situations where the
1585         identity of guest registers is not known until run time.  One
1586         example is the x87 FP register stack.
1587
1588         The part of the guest state to be treated as a circular array
1589         is described in the IRRegArray 'descr' field.  It holds the
1590         offset of the first element in the array, the type of each
1591         element, and the number of elements.
1592
1593         The array index is indicated rather indirectly, in a way
1594         which makes optimisation easy: as the sum of variable part
1595         (the 'ix' field) and a constant offset (the 'bias' field).
1596
1597         Since the indexing is circular, the actual array index to use
1598         is computed as (ix + bias) % num-of-elems-in-the-array.
1599
1600         Here's an example.  The description
1601
1602            (96:8xF64)[t39,-7]
1603
1604         describes an array of 8 F64-typed values, the
1605         guest-state-offset of the first being 96.  This array is
1606         being indexed at (t39 - 7) % 8.
1607
1608         It is important to get the array size/type exactly correct
1609         since IR optimisation looks closely at such info in order to
1610         establish aliasing/non-aliasing between seperate GetI and
1611         PutI events, which is used to establish when they can be
1612         reordered, etc.  Putting incorrect info in will lead to
1613         obscure IR optimisation bugs.
1614
1615            ppIRExpr output: GETI<descr>[<ix>,<bias]
1616                         eg. GETI(128:8xI8)[t1,0]
1617      */
1618      struct {
1619         IRRegArray* descr; /* Part of guest state treated as circular */
1620         IRExpr*     ix;    /* Variable part of index into array */
1621         Int         bias;  /* Constant offset part of index into array */
1622      } GetI;
1623
1624      /* The value held by a temporary.
1625         ppIRExpr output: t<tmp>, eg. t1
1626      */
1627      struct {
1628         IRTemp tmp;       /* The temporary number */
1629      } RdTmp;
1630
1631      /* A quaternary operation.
1632         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
1633                      eg. MAddF64r32(t1, t2, t3, t4)
1634      */
1635      struct {
1636        IRQop* details;
1637      } Qop;
1638
1639      /* A ternary operation.
1640         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
1641                      eg. MulF64(1, 2.0, 3.0)
1642      */
1643      struct {
1644        IRTriop* details;
1645      } Triop;
1646
1647      /* A binary operation.
1648         ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
1649      */
1650      struct {
1651         IROp op;          /* op-code   */
1652         IRExpr* arg1;     /* operand 1 */
1653         IRExpr* arg2;     /* operand 2 */
1654      } Binop;
1655
1656      /* A unary operation.
1657         ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
1658      */
1659      struct {
1660         IROp    op;       /* op-code */
1661         IRExpr* arg;      /* operand */
1662      } Unop;
1663
1664      /* A load from memory -- a normal load, not a load-linked.
1665         Load-Linkeds (and Store-Conditionals) are instead represented
1666         by IRStmt.LLSC since Load-Linkeds have side effects and so
1667         are not semantically valid IRExpr's.
1668         ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
1669      */
1670      struct {
1671         IREndness end;    /* Endian-ness of the load */
1672         IRType    ty;     /* Type of the loaded value */
1673         IRExpr*   addr;   /* Address being loaded from */
1674      } Load;
1675
1676      /* A constant-valued expression.
1677         ppIRExpr output: <con>, eg. 0x4:I32
1678      */
1679      struct {
1680         IRConst* con;     /* The constant itself */
1681      } Const;
1682
1683      /* A call to a pure (no side-effects) helper C function.
1684
1685         With the 'cee' field, 'name' is the function's name.  It is
1686         only used for pretty-printing purposes.  The address to call
1687         (host address, of course) is stored in the 'addr' field
1688         inside 'cee'.
1689
1690         The 'args' field is a NULL-terminated array of arguments.
1691         The stated return IRType, and the implied argument types,
1692         must match that of the function being called well enough so
1693         that the back end can actually generate correct code for the
1694         call.
1695
1696         The called function **must** satisfy the following:
1697
1698         * no side effects -- must be a pure function, the result of
1699           which depends only on the passed parameters.
1700
1701         * it may not look at, nor modify, any of the guest state
1702           since that would hide guest state transitions from
1703           instrumenters
1704
1705         * it may not access guest memory, since that would hide
1706           guest memory transactions from the instrumenters
1707
1708         * it must not assume that arguments are being evaluated in a
1709           particular order. The oder of evaluation is unspecified.
1710
1711         This is restrictive, but makes the semantics clean, and does
1712         not interfere with IR optimisation.
1713
1714         If you want to call a helper which can mess with guest state
1715         and/or memory, instead use Ist_Dirty.  This is a lot more
1716         flexible, but you have to give a bunch of details about what
1717         the helper does (and you better be telling the truth,
1718         otherwise any derived instrumentation will be wrong).  Also
1719         Ist_Dirty inhibits various IR optimisations and so can cause
1720         quite poor code to be generated.  Try to avoid it.
1721
1722         ppIRExpr output: <cee>(<args>):<retty>
1723                      eg. foo{0x80489304}(t1, t2):I32
1724      */
1725      struct {
1726         IRCallee* cee;    /* Function to call. */
1727         IRType    retty;  /* Type of return value. */
1728         IRExpr**  args;   /* Vector of argument expressions. */
1729      }  CCall;
1730
1731      /* A ternary if-then-else operator.  It returns expr0 if cond is
1732         zero, exprX otherwise.  Note that it is STRICT, ie. both
1733         expr0 and exprX are evaluated in all cases.
1734
1735         ppIRExpr output: Mux0X(<cond>,<expr0>,<exprX>),
1736                         eg. Mux0X(t6,t7,t8)
1737      */
1738      struct {
1739         IRExpr* cond;     /* Condition */
1740         IRExpr* expr0;    /* True expression */
1741         IRExpr* exprX;    /* False expression */
1742      } Mux0X;
1743   } Iex;
1744};
1745
1746/* ------------------ A ternary expression ---------------------- */
1747struct _IRTriop {
1748   IROp op;          /* op-code   */
1749   IRExpr* arg1;     /* operand 1 */
1750   IRExpr* arg2;     /* operand 2 */
1751   IRExpr* arg3;     /* operand 3 */
1752};
1753
1754/* ------------------ A quarternary expression ------------------ */
1755struct _IRQop {
1756   IROp op;          /* op-code   */
1757   IRExpr* arg1;     /* operand 1 */
1758   IRExpr* arg2;     /* operand 2 */
1759   IRExpr* arg3;     /* operand 3 */
1760   IRExpr* arg4;     /* operand 4 */
1761};
1762
1763/* Expression constructors. */
1764extern IRExpr* IRExpr_Binder ( Int binder );
1765extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
1766extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
1767extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
1768extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
1769                                        IRExpr* arg3, IRExpr* arg4 );
1770extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
1771                                        IRExpr* arg2, IRExpr* arg3 );
1772extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
1773extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
1774extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
1775extern IRExpr* IRExpr_Const  ( IRConst* con );
1776extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
1777extern IRExpr* IRExpr_Mux0X  ( IRExpr* cond, IRExpr* expr0, IRExpr* exprX );
1778
1779/* Deep-copy an IRExpr. */
1780extern IRExpr* deepCopyIRExpr ( IRExpr* );
1781
1782/* Pretty-print an IRExpr. */
1783extern void ppIRExpr ( IRExpr* );
1784
1785/* NULL-terminated IRExpr vector constructors, suitable for
1786   use as arg lists in clean/dirty helper calls. */
1787extern IRExpr** mkIRExprVec_0 ( void );
1788extern IRExpr** mkIRExprVec_1 ( IRExpr* );
1789extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
1790extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
1791extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
1792extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1793                                IRExpr* );
1794extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1795                                IRExpr*, IRExpr* );
1796extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1797                                IRExpr*, IRExpr*, IRExpr* );
1798extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
1799                                IRExpr*, IRExpr*, IRExpr*, IRExpr*);
1800
1801/* IRExpr copiers:
1802   - shallowCopy: shallow-copy (ie. create a new vector that shares the
1803     elements with the original).
1804   - deepCopy: deep-copy (ie. create a completely new vector). */
1805extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
1806extern IRExpr** deepCopyIRExprVec ( IRExpr** );
1807
1808/* Make a constant expression from the given host word taking into
1809   account (of course) the host word size. */
1810extern IRExpr* mkIRExpr_HWord ( HWord );
1811
1812/* Convenience function for constructing clean helper calls. */
1813extern
1814IRExpr* mkIRExprCCall ( IRType retty,
1815                        Int regparms, HChar* name, void* addr,
1816                        IRExpr** args );
1817
1818
1819/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
1820 * Iex_Const). */
1821static inline Bool isIRAtom ( IRExpr* e ) {
1822   return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
1823}
1824
1825/* Are these two IR atoms identical?  Causes an assertion
1826   failure if they are passed non-atoms. */
1827extern Bool eqIRAtom ( IRExpr*, IRExpr* );
1828
1829
1830/* ------------------ Jump kinds ------------------ */
1831
1832/* This describes hints which can be passed to the dispatcher at guest
1833   control-flow transfer points.
1834
1835   Re Ijk_TInval: the guest state _must_ have two pseudo-registers,
1836   guest_TISTART and guest_TILEN, which specify the start and length
1837   of the region to be invalidated.  These are both the size of a
1838   guest word.  It is the responsibility of the relevant toIR.c to
1839   ensure that these are filled in with suitable values before issuing
1840   a jump of kind Ijk_TInval.
1841
1842   Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
1843   pseudo-register guest_EMWARN, which is 32-bits regardless of the
1844   host or guest word size.  That register should be made to hold an
1845   EmWarn_* value to indicate the reason for the exit.
1846
1847   In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
1848   cannot continue) and so the jump destination can be anything.
1849
1850   Re Ijk_Sys_ (syscall jumps): the guest state must have a
1851   pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
1852   word.  Front ends should set this to be the IP at the most recently
1853   executed kernel-entering (system call) instruction.  This makes it
1854   very much easier (viz, actually possible at all) to back up the
1855   guest to restart a syscall that has been interrupted by a signal.
1856*/
1857typedef
1858   enum {
1859      Ijk_INVALID=0x16000,
1860      Ijk_Boring,         /* not interesting; just goto next */
1861      Ijk_Call,           /* guest is doing a call */
1862      Ijk_Ret,            /* guest is doing a return */
1863      Ijk_ClientReq,      /* do guest client req before continuing */
1864      Ijk_Yield,          /* client is yielding to thread scheduler */
1865      Ijk_EmWarn,         /* report emulation warning before continuing */
1866      Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
1867      Ijk_NoDecode,       /* next instruction cannot be decoded */
1868      Ijk_MapFail,        /* Vex-provided address translation failed */
1869      Ijk_TInval,         /* Invalidate translations before continuing. */
1870      Ijk_NoRedir,        /* Jump to un-redirected guest addr */
1871      Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
1872      Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
1873      Ijk_SigBUS,         /* current instruction synths SIGBUS */
1874      /* Unfortunately, various guest-dependent syscall kinds.  They
1875	 all mean: do a syscall before continuing. */
1876      Ijk_Sys_syscall,    /* amd64 'syscall', ppc 'sc', arm 'svc #0' */
1877      Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
1878      Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
1879      Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
1880      Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
1881      Ijk_Sys_sysenter    /* x86 'sysenter'.  guest_EIP becomes
1882                             invalid at the point this happens. */
1883   }
1884   IRJumpKind;
1885
1886extern void ppIRJumpKind ( IRJumpKind );
1887
1888
1889/* ------------------ Dirty helper calls ------------------ */
1890
1891/* A dirty call is a flexible mechanism for calling (possibly
1892   conditionally) a helper function or procedure.  The helper function
1893   may read, write or modify client memory, and may read, write or
1894   modify client state.  It can take arguments and optionally return a
1895   value.  It may return different results and/or do different things
1896   when called repeatedly with the same arguments, by means of storing
1897   private state.
1898
1899   If a value is returned, it is assigned to the nominated return
1900   temporary.
1901
1902   Dirty calls are statements rather than expressions for obvious
1903   reasons.  If a dirty call is marked as writing guest state, any
1904   values derived from the written parts of the guest state are
1905   invalid.  Similarly, if the dirty call is stated as writing
1906   memory, any loaded values are invalidated by it.
1907
1908   In order that instrumentation is possible, the call must state, and
1909   state correctly:
1910
1911   * whether it reads, writes or modifies memory, and if so where
1912     (only one chunk can be stated)
1913
1914   * whether it reads, writes or modifies guest state, and if so which
1915     pieces (several pieces may be stated, and currently their extents
1916     must be known at translation-time).
1917
1918   Normally, code is generated to pass just the args to the helper.
1919   However, if .needsBBP is set, then an extra first argument is
1920   passed, which is the baseblock pointer, so that the callee can
1921   access the guest state.  It is invalid for .nFxState to be zero
1922   but .needsBBP to be True, since .nFxState==0 is a claim that the
1923   call does not access guest state.
1924
1925   IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
1926   arguments are evaluated REGARDLESS of the guard value.  The order of
1927   argument evaluation is unspecified. The guard expression is evaluated
1928   AFTER the arguments have been evaluated.
1929*/
1930
1931#define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
1932
1933/* Effects on resources (eg. registers, memory locations) */
1934typedef
1935   enum {
1936      Ifx_None = 0x1700,    /* no effect */
1937      Ifx_Read,             /* reads the resource */
1938      Ifx_Write,            /* writes the resource */
1939      Ifx_Modify,           /* modifies the resource */
1940   }
1941   IREffect;
1942
1943/* Pretty-print an IREffect */
1944extern void ppIREffect ( IREffect );
1945
1946
1947typedef
1948   struct _IRDirty {
1949      /* What to call, and details of args/results.  .guard must be
1950         non-NULL.  If .tmp is not IRTemp_INVALID (that is, the call
1951         returns a result) then .guard must be demonstrably (at
1952         JIT-time) always true, that is, the call must be
1953         unconditional.  Conditional calls that assign .tmp are not
1954         allowed. */
1955      IRCallee* cee;    /* where to call */
1956      IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
1957      IRExpr**  args;   /* arg list, ends in NULL */
1958      IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
1959
1960      /* Mem effects; we allow only one R/W/M region to be stated */
1961      IREffect  mFx;    /* indicates memory effects, if any */
1962      IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
1963      Int       mSize;  /* of access, or zero if mFx==Ifx_None */
1964
1965      /* Guest state effects; up to N allowed */
1966      Bool needsBBP; /* True => also pass guest state ptr to callee */
1967      Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
1968      struct {
1969         IREffect fx:16;   /* read, write or modify?  Ifx_None is invalid. */
1970         UShort   offset;
1971         UShort   size;
1972         UChar    nRepeats;
1973         UChar    repeatLen;
1974      } fxState[VEX_N_FXSTATE];
1975      /* The access can be repeated, as specified by nRepeats and
1976         repeatLen.  To describe only a single access, nRepeats and
1977         repeatLen should be zero.  Otherwise, repeatLen must be a
1978         multiple of size and greater than size. */
1979      /* Overall, the parts of the guest state denoted by (offset,
1980         size, nRepeats, repeatLen) is
1981               [offset, +size)
1982            and, if nRepeats > 0,
1983               for (i = 1; i <= nRepeats; i++)
1984                  [offset + i * repeatLen, +size)
1985         A convenient way to enumerate all segments is therefore
1986            for (i = 0; i < 1 + nRepeats; i++)
1987               [offset + i * repeatLen, +size)
1988      */
1989   }
1990   IRDirty;
1991
1992/* Pretty-print a dirty call */
1993extern void     ppIRDirty ( IRDirty* );
1994
1995/* Allocate an uninitialised dirty call */
1996extern IRDirty* emptyIRDirty ( void );
1997
1998/* Deep-copy a dirty call */
1999extern IRDirty* deepCopyIRDirty ( IRDirty* );
2000
2001/* A handy function which takes some of the tedium out of constructing
2002   dirty helper calls.  The called function impliedly does not return
2003   any value and has a constant-True guard.  The call is marked as
2004   accessing neither guest state nor memory (hence the "unsafe"
2005   designation) -- you can change this marking later if need be.  A
2006   suitable IRCallee is constructed from the supplied bits. */
2007extern
2008IRDirty* unsafeIRDirty_0_N ( Int regparms, HChar* name, void* addr,
2009                             IRExpr** args );
2010
2011/* Similarly, make a zero-annotation dirty call which returns a value,
2012   and assign that to the given temp. */
2013extern
2014IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
2015                             Int regparms, HChar* name, void* addr,
2016                             IRExpr** args );
2017
2018
2019/* --------------- Memory Bus Events --------------- */
2020
2021typedef
2022   enum {
2023      Imbe_Fence=0x18000,
2024      /* Needed only on ARM.  It cancels a reservation made by a
2025         preceding Linked-Load, and needs to be handed through to the
2026         back end, just as LL and SC themselves are. */
2027      Imbe_CancelReservation
2028   }
2029   IRMBusEvent;
2030
2031extern void ppIRMBusEvent ( IRMBusEvent );
2032
2033
2034/* --------------- Compare and Swap --------------- */
2035
2036/* This denotes an atomic compare and swap operation, either
2037   a single-element one or a double-element one.
2038
2039   In the single-element case:
2040
2041     .addr is the memory address.
2042     .end  is the endianness with which memory is accessed
2043
2044     If .addr contains the same value as .expdLo, then .dataLo is
2045     written there, else there is no write.  In both cases, the
2046     original value at .addr is copied into .oldLo.
2047
2048     Types: .expdLo, .dataLo and .oldLo must all have the same type.
2049     It may be any integral type, viz: I8, I16, I32 or, for 64-bit
2050     guests, I64.
2051
2052     .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
2053     be NULL.
2054
2055   In the double-element case:
2056
2057     .addr is the memory address.
2058     .end  is the endianness with which memory is accessed
2059
2060     The operation is the same:
2061
2062     If .addr contains the same value as .expdHi:.expdLo, then
2063     .dataHi:.dataLo is written there, else there is no write.  In
2064     both cases the original value at .addr is copied into
2065     .oldHi:.oldLo.
2066
2067     Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
2068     all have the same type, which may be any integral type, viz: I8,
2069     I16, I32 or, for 64-bit guests, I64.
2070
2071     The double-element case is complicated by the issue of
2072     endianness.  In all cases, the two elements are understood to be
2073     located adjacently in memory, starting at the address .addr.
2074
2075       If .end is Iend_LE, then the .xxxLo component is at the lower
2076       address and the .xxxHi component is at the higher address, and
2077       each component is itself stored little-endianly.
2078
2079       If .end is Iend_BE, then the .xxxHi component is at the lower
2080       address and the .xxxLo component is at the higher address, and
2081       each component is itself stored big-endianly.
2082
2083   This allows representing more cases than most architectures can
2084   handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
2085
2086   How to know if the CAS succeeded?
2087
2088   * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
2089     then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
2090     stored at .addr, and the original value there was .oldLo (resp
2091     .oldHi:.oldLo).
2092
2093   * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
2094     then the CAS failed, and the original value at .addr was .oldLo
2095     (resp. .oldHi:.oldLo).
2096
2097   Hence it is easy to know whether or not the CAS succeeded.
2098*/
2099typedef
2100   struct {
2101      IRTemp    oldHi;  /* old value of *addr is written here */
2102      IRTemp    oldLo;
2103      IREndness end;    /* endianness of the data in memory */
2104      IRExpr*   addr;   /* store address */
2105      IRExpr*   expdHi; /* expected old value at *addr */
2106      IRExpr*   expdLo;
2107      IRExpr*   dataHi; /* new value for *addr */
2108      IRExpr*   dataLo;
2109   }
2110   IRCAS;
2111
2112extern void ppIRCAS ( IRCAS* cas );
2113
2114extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
2115                        IREndness end, IRExpr* addr,
2116                        IRExpr* expdHi, IRExpr* expdLo,
2117                        IRExpr* dataHi, IRExpr* dataLo );
2118
2119extern IRCAS* deepCopyIRCAS ( IRCAS* );
2120
2121
2122/* ------------------ Circular Array Put ------------------ */
2123typedef
2124   struct {
2125      IRRegArray* descr; /* Part of guest state treated as circular */
2126      IRExpr*     ix;    /* Variable part of index into array */
2127      Int         bias;  /* Constant offset part of index into array */
2128      IRExpr*     data;  /* The value to write */
2129   } IRPutI;
2130
2131extern void ppIRPutI ( IRPutI* puti );
2132
2133extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix,
2134                          Int bias, IRExpr* data );
2135
2136extern IRPutI* deepCopyIRPutI ( IRPutI* );
2137
2138
2139/* ------------------ Statements ------------------ */
2140
2141/* The different kinds of statements.  Their meaning is explained
2142   below in the comments for IRStmt.
2143
2144   Those marked META do not represent code, but rather extra
2145   information about the code.  These statements can be removed
2146   without affecting the functional behaviour of the code, however
2147   they are required by some IR consumers such as tools that
2148   instrument the code.
2149*/
2150
2151typedef
2152   enum {
2153      Ist_NoOp=0x19000,
2154      Ist_IMark,     /* META */
2155      Ist_AbiHint,   /* META */
2156      Ist_Put,
2157      Ist_PutI,
2158      Ist_WrTmp,
2159      Ist_Store,
2160      Ist_CAS,
2161      Ist_LLSC,
2162      Ist_Dirty,
2163      Ist_MBE,       /* META (maybe) */
2164      Ist_Exit
2165   }
2166   IRStmtTag;
2167
2168/* A statement.  Stored as a tagged union.  'tag' indicates what kind
2169   of expression this is.  'Ist' is the union that holds the fields.
2170   If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
2171   statement, and the fields can be accessed with
2172   'st.Ist.Store.<fieldname>'.
2173
2174   For each kind of statement, we show what it looks like when
2175   pretty-printed with ppIRStmt().
2176*/
2177typedef
2178   struct _IRStmt {
2179      IRStmtTag tag;
2180      union {
2181         /* A no-op (usually resulting from IR optimisation).  Can be
2182            omitted without any effect.
2183
2184            ppIRStmt output: IR-NoOp
2185         */
2186         struct {
2187	 } NoOp;
2188
2189         /* META: instruction mark.  Marks the start of the statements
2190            that represent a single machine instruction (the end of
2191            those statements is marked by the next IMark or the end of
2192            the IRSB).  Contains the address and length of the
2193            instruction.
2194
2195            It also contains a delta value.  The delta must be
2196            subtracted from a guest program counter value before
2197            attempting to establish, by comparison with the address
2198            and length values, whether or not that program counter
2199            value refers to this instruction.  For x86, amd64, ppc32,
2200            ppc64 and arm, the delta value is zero.  For Thumb
2201            instructions, the delta value is one.  This is because, on
2202            Thumb, guest PC values (guest_R15T) are encoded using the
2203            top 31 bits of the instruction address and a 1 in the lsb;
2204            hence they appear to be (numerically) 1 past the start of
2205            the instruction they refer to.  IOW, guest_R15T on ARM
2206            holds a standard ARM interworking address.
2207
2208            ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
2209                         eg. ------ IMark(0x4000792, 5, 0) ------,
2210         */
2211         struct {
2212            Addr64 addr;   /* instruction address */
2213            Int    len;    /* instruction length */
2214            UChar  delta;  /* addr = program counter as encoded in guest state
2215                                     - delta */
2216         } IMark;
2217
2218         /* META: An ABI hint, which says something about this
2219            platform's ABI.
2220
2221            At the moment, the only AbiHint is one which indicates
2222            that a given chunk of address space, [base .. base+len-1],
2223            has become undefined.  This is used on amd64-linux and
2224            some ppc variants to pass stack-redzoning hints to whoever
2225            wants to see them.  It also indicates the address of the
2226            next (dynamic) instruction that will be executed.  This is
2227            to help Memcheck to origin tracking.
2228
2229            ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
2230                         eg. ====== AbiHint(t1, 16, t2) ======
2231         */
2232         struct {
2233            IRExpr* base;     /* Start  of undefined chunk */
2234            Int     len;      /* Length of undefined chunk */
2235            IRExpr* nia;      /* Address of next (guest) insn */
2236         } AbiHint;
2237
2238         /* Write a guest register, at a fixed offset in the guest state.
2239            ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
2240         */
2241         struct {
2242            Int     offset;   /* Offset into the guest state */
2243            IRExpr* data;     /* The value to write */
2244         } Put;
2245
2246         /* Write a guest register, at a non-fixed offset in the guest
2247            state.  See the comment for GetI expressions for more
2248            information.
2249
2250            ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
2251                         eg. PUTI(64:8xF64)[t5,0] = t1
2252         */
2253         struct {
2254            IRPutI* details;
2255         } PutI;
2256
2257         /* Assign a value to a temporary.  Note that SSA rules require
2258            each tmp is only assigned to once.  IR sanity checking will
2259            reject any block containing a temporary which is not assigned
2260            to exactly once.
2261
2262            ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2263         */
2264         struct {
2265            IRTemp  tmp;   /* Temporary  (LHS of assignment) */
2266            IRExpr* data;  /* Expression (RHS of assignment) */
2267         } WrTmp;
2268
2269         /* Write a value to memory.  This is a normal store, not a
2270            Store-Conditional.  To represent a Store-Conditional,
2271            instead use IRStmt.LLSC.
2272            ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2273         */
2274         struct {
2275            IREndness end;    /* Endianness of the store */
2276            IRExpr*   addr;   /* store address */
2277            IRExpr*   data;   /* value to write */
2278         } Store;
2279
2280         /* Do an atomic compare-and-swap operation.  Semantics are
2281            described above on a comment at the definition of IRCAS.
2282
2283            ppIRStmt output:
2284               t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2285            eg
2286               t1 = CASle(t2 :: t3->Add32(t3,1))
2287               which denotes a 32-bit atomic increment
2288               of a value at address t2
2289
2290            A double-element CAS may also be denoted, in which case <tmp>,
2291            <expected> and <new> are all pairs of items, separated by
2292            commas.
2293         */
2294         struct {
2295            IRCAS* details;
2296         } CAS;
2297
2298         /* Either Load-Linked or Store-Conditional, depending on
2299            STOREDATA.
2300
2301            If STOREDATA is NULL then this is a Load-Linked, meaning
2302            that data is loaded from memory as normal, but a
2303            'reservation' for the address is also lodged in the
2304            hardware.
2305
2306               result = Load-Linked(addr, end)
2307
2308            The data transfer type is the type of RESULT (I32, I64,
2309            etc).  ppIRStmt output:
2310
2311               result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2312
2313            If STOREDATA is not NULL then this is a Store-Conditional,
2314            hence:
2315
2316               result = Store-Conditional(addr, storedata, end)
2317
2318            The data transfer type is the type of STOREDATA and RESULT
2319            has type Ity_I1. The store may fail or succeed depending
2320            on the state of a previously lodged reservation on this
2321            address.  RESULT is written 1 if the store succeeds and 0
2322            if it fails.  eg ppIRStmt output:
2323
2324               result = ( ST<end>-Cond(<addr>) = <storedata> )
2325               eg t3 = ( STbe-Cond(t1, t2) )
2326
2327            In all cases, the address must be naturally aligned for
2328            the transfer type -- any misaligned addresses should be
2329            caught by a dominating IR check and side exit.  This
2330            alignment restriction exists because on at least some
2331            LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2332            misaligned addresses, and we have to actually generate
2333            stwcx. on the host, and we don't want it trapping on the
2334            host.
2335
2336            Summary of rules for transfer type:
2337              STOREDATA == NULL (LL):
2338                transfer type = type of RESULT
2339              STOREDATA != NULL (SC):
2340                transfer type = type of STOREDATA, and RESULT :: Ity_I1
2341         */
2342         struct {
2343            IREndness end;
2344            IRTemp    result;
2345            IRExpr*   addr;
2346            IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
2347         } LLSC;
2348
2349         /* Call (possibly conditionally) a C function that has side
2350            effects (ie. is "dirty").  See the comments above the
2351            IRDirty type declaration for more information.
2352
2353            ppIRStmt output:
2354               t<tmp> = DIRTY <guard> <effects>
2355                  ::: <callee>(<args>)
2356            eg.
2357               t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2358                     ::: foo{0x380035f4}(t2)
2359         */
2360         struct {
2361            IRDirty* details;
2362         } Dirty;
2363
2364         /* A memory bus event - a fence, or acquisition/release of the
2365            hardware bus lock.  IR optimisation treats all these as fences
2366            across which no memory references may be moved.
2367            ppIRStmt output: MBusEvent-Fence,
2368                             MBusEvent-BusLock, MBusEvent-BusUnlock.
2369         */
2370         struct {
2371            IRMBusEvent event;
2372         } MBE;
2373
2374         /* Conditional exit from the middle of an IRSB.
2375            ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2376                         eg. if (t69) goto {Boring} 0x4000AAA:I32
2377            If <guard> is true, the guest state is also updated by
2378            PUT-ing <dst> at <offsIP>.  This is done because a
2379            taken exit must update the guest program counter.
2380         */
2381         struct {
2382            IRExpr*    guard;    /* Conditional expression */
2383            IRConst*   dst;      /* Jump target (constant only) */
2384            IRJumpKind jk;       /* Jump kind */
2385            Int        offsIP;   /* Guest state offset for IP */
2386         } Exit;
2387      } Ist;
2388   }
2389   IRStmt;
2390
2391/* Statement constructors. */
2392extern IRStmt* IRStmt_NoOp    ( void );
2393extern IRStmt* IRStmt_IMark   ( Addr64 addr, Int len, UChar delta );
2394extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2395extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
2396extern IRStmt* IRStmt_PutI    ( IRPutI* details );
2397extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
2398extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
2399extern IRStmt* IRStmt_CAS     ( IRCAS* details );
2400extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
2401                                IRExpr* addr, IRExpr* storedata );
2402extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
2403extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
2404extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
2405                                Int offsIP );
2406
2407/* Deep-copy an IRStmt. */
2408extern IRStmt* deepCopyIRStmt ( IRStmt* );
2409
2410/* Pretty-print an IRStmt. */
2411extern void ppIRStmt ( IRStmt* );
2412
2413
2414/* ------------------ Basic Blocks ------------------ */
2415
2416/* Type environments: a bunch of statements, expressions, etc, are
2417   incomplete without an environment indicating the type of each
2418   IRTemp.  So this provides one.  IR temporaries are really just
2419   unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2420   them.
2421*/
2422typedef
2423   struct {
2424      IRType* types;
2425      Int     types_size;
2426      Int     types_used;
2427   }
2428   IRTypeEnv;
2429
2430/* Obtain a new IRTemp */
2431extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2432
2433/* Deep-copy a type environment */
2434extern IRTypeEnv* deepCopyIRTypeEnv ( IRTypeEnv* );
2435
2436/* Pretty-print a type environment */
2437extern void ppIRTypeEnv ( IRTypeEnv* );
2438
2439
2440/* Code blocks, which in proper compiler terminology are superblocks
2441   (single entry, multiple exit code sequences) contain:
2442
2443   - A table giving a type for each temp (the "type environment")
2444   - An expandable array of statements
2445   - An expression of type 32 or 64 bits, depending on the
2446     guest's word size, indicating the next destination if the block
2447     executes all the way to the end, without a side exit
2448   - An indication of any special actions (JumpKind) needed
2449     for this final jump.
2450   - Offset of the IP field in the guest state.  This will be
2451     updated before the final jump is done.
2452
2453   "IRSB" stands for "IR Super Block".
2454*/
2455typedef
2456   struct {
2457      IRTypeEnv* tyenv;
2458      IRStmt**   stmts;
2459      Int        stmts_size;
2460      Int        stmts_used;
2461      IRExpr*    next;
2462      IRJumpKind jumpkind;
2463      Int        offsIP;
2464   }
2465   IRSB;
2466
2467/* Allocate a new, uninitialised IRSB */
2468extern IRSB* emptyIRSB ( void );
2469
2470/* Deep-copy an IRSB */
2471extern IRSB* deepCopyIRSB ( IRSB* );
2472
2473/* Deep-copy an IRSB, except for the statements list, which set to be
2474   a new, empty, list of statements. */
2475extern IRSB* deepCopyIRSBExceptStmts ( IRSB* );
2476
2477/* Pretty-print an IRSB */
2478extern void ppIRSB ( IRSB* );
2479
2480/* Append an IRStmt to an IRSB */
2481extern void addStmtToIRSB ( IRSB*, IRStmt* );
2482
2483
2484/*---------------------------------------------------------------*/
2485/*--- Helper functions for the IR                             ---*/
2486/*---------------------------------------------------------------*/
2487
2488/* For messing with IR type environments */
2489extern IRTypeEnv* emptyIRTypeEnv  ( void );
2490
2491/* What is the type of this expression? */
2492extern IRType typeOfIRConst ( IRConst* );
2493extern IRType typeOfIRTemp  ( IRTypeEnv*, IRTemp );
2494extern IRType typeOfIRExpr  ( IRTypeEnv*, IRExpr* );
2495
2496/* Sanity check a BB of IR */
2497extern void sanityCheckIRSB ( IRSB*  bb,
2498                              HChar* caller,
2499                              Bool   require_flatness,
2500                              IRType guest_word_size );
2501extern Bool isFlatIRStmt ( IRStmt* );
2502
2503/* Is this any value actually in the enumeration 'IRType' ? */
2504extern Bool isPlausibleIRType ( IRType ty );
2505
2506#endif /* ndef __LIBVEX_IR_H */
2507
2508
2509/*---------------------------------------------------------------*/
2510/*---                                             libvex_ir.h ---*/
2511/*---------------------------------------------------------------*/
2512