1
2/*---------------------------------------------------------------*/
3/*--- begin                                       libvex_ir.h ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2015 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#ifndef __LIBVEX_IR_H
37#define __LIBVEX_IR_H
38
39#include "libvex_basictypes.h"
40
41
42/*---------------------------------------------------------------*/
43/*--- High-level IR description                               ---*/
44/*---------------------------------------------------------------*/
45
46/* Vex IR is an architecture-neutral intermediate representation.
47   Unlike some IRs in systems similar to Vex, it is not like assembly
48   language (ie. a list of instructions).  Rather, it is more like the
49   IR that might be used in a compiler.
50
51   Code blocks
52   ~~~~~~~~~~~
53   The code is broken into small code blocks ("superblocks", type:
54   'IRSB').  Each code block typically represents from 1 to perhaps 50
55   instructions.  IRSBs are single-entry, multiple-exit code blocks.
56   Each IRSB contains three things:
57   - a type environment, which indicates the type of each temporary
58     value present in the IRSB
59   - a list of statements, which represent code
60   - a jump that exits from the end the IRSB
61   Because the blocks are multiple-exit, there can be additional
62   conditional exit statements that cause control to leave the IRSB
63   before the final exit.  Also because of this, IRSBs can cover
64   multiple non-consecutive sequences of code (up to 3).  These are
65   recorded in the type VexGuestExtents (see libvex.h).
66
67   Statements and expressions
68   ~~~~~~~~~~~~~~~~~~~~~~~~~~
69   Statements (type 'IRStmt') represent operations with side-effects,
70   eg.  guest register writes, stores, and assignments to temporaries.
71   Expressions (type 'IRExpr') represent operations without
72   side-effects, eg. arithmetic operations, loads, constants.
73   Expressions can contain sub-expressions, forming expression trees,
74   eg. (3 + (4 * load(addr1)).
75
76   Storage of guest state
77   ~~~~~~~~~~~~~~~~~~~~~~
78   The "guest state" contains the guest registers of the guest machine
79   (ie.  the machine that we are simulating).  It is stored by default
80   in a block of memory supplied by the user of the VEX library,
81   generally referred to as the guest state (area).  To operate on
82   these registers, one must first read ("Get") them from the guest
83   state into a temporary value.  Afterwards, one can write ("Put")
84   them back into the guest state.
85
86   Get and Put are characterised by a byte offset into the guest
87   state, a small integer which effectively gives the identity of the
88   referenced guest register, and a type, which indicates the size of
89   the value to be transferred.
90
91   The basic "Get" and "Put" operations are sufficient to model normal
92   fixed registers on the guest.  Selected areas of the guest state
93   can be treated as a circular array of registers (type:
94   'IRRegArray'), which can be indexed at run-time.  This is done with
95   the "GetI" and "PutI" primitives.  This is necessary to describe
96   rotating register files, for example the x87 FPU stack, SPARC
97   register windows, and the Itanium register files.
98
99   Examples, and flattened vs. unflattened code
100   ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
101   For example, consider this x86 instruction:
102
103     addl %eax, %ebx
104
105   One Vex IR translation for this code would be this:
106
107     ------ IMark(0x24F275, 7, 0) ------
108     t3 = GET:I32(0)             # get %eax, a 32-bit integer
109     t2 = GET:I32(12)            # get %ebx, a 32-bit integer
110     t1 = Add32(t3,t2)           # addl
111     PUT(0) = t1                 # put %eax
112
113   (For simplicity, this ignores the effects on the condition codes, and
114   the update of the instruction pointer.)
115
116   The "IMark" is an IR statement that doesn't represent actual code.
117   Instead it indicates the address and length of the original
118   instruction.  The numbers 0 and 12 are offsets into the guest state
119   for %eax and %ebx.  The full list of offsets for an architecture
120   <ARCH> can be found in the type VexGuest<ARCH>State in the file
121   VEX/pub/libvex_guest_<ARCH>.h.
122
123   The five statements in this example are:
124   - the IMark
125   - three assignments to temporaries
126   - one register write (put)
127
128   The six expressions in this example are:
129   - two register reads (gets)
130   - one arithmetic (add) operation
131   - three temporaries (two nested within the Add32, one in the PUT)
132
133   The above IR is "flattened", ie. all sub-expressions are "atoms",
134   either constants or temporaries.  An equivalent, unflattened version
135   would be:
136
137     PUT(0) = Add32(GET:I32(0), GET:I32(12))
138
139   IR is guaranteed to be flattened at instrumentation-time.  This makes
140   instrumentation easier.  Equivalent flattened and unflattened IR
141   typically results in the same generated code.
142
143   Another example, this one showing loads and stores:
144
145     addl %edx,4(%eax)
146
147   This becomes (again ignoring condition code and instruction pointer
148   updates):
149
150     ------ IMark(0x4000ABA, 3, 0) ------
151     t3 = Add32(GET:I32(0),0x4:I32)
152     t2 = LDle:I32(t3)
153     t1 = GET:I32(8)
154     t0 = Add32(t2,t1)
155     STle(t3) = t0
156
157   The "le" in "LDle" and "STle" is short for "little-endian".
158
159   No need for deallocations
160   ~~~~~~~~~~~~~~~~~~~~~~~~~
161   Although there are allocation functions for various data structures
162   in this file, there are no deallocation functions.  This is because
163   Vex uses a memory allocation scheme that automatically reclaims the
164   memory used by allocated structures once translation is completed.
165   This makes things easier for tools that instruments/transforms code
166   blocks.
167
168   SSAness and typing
169   ~~~~~~~~~~~~~~~~~~
170   The IR is fully typed.  For every IRSB (IR block) it is possible to
171   say unambiguously whether or not it is correctly typed.
172   Incorrectly typed IR has no meaning and the VEX will refuse to
173   process it.  At various points during processing VEX typechecks the
174   IR and aborts if any violations are found.  This seems overkill but
175   makes it a great deal easier to build a reliable JIT.
176
177   IR also has the SSA property.  SSA stands for Static Single
178   Assignment, and what it means is that each IR temporary may be
179   assigned to only once.  This idea became widely used in compiler
180   construction in the mid to late 90s.  It makes many IR-level
181   transformations/code improvements easier, simpler and faster.
182   Whenever it typechecks an IR block, VEX also checks the SSA
183   property holds, and will abort if not so.  So SSAness is
184   mechanically and rigidly enforced.
185*/
186
187/*---------------------------------------------------------------*/
188/*--- Type definitions for the IR                             ---*/
189/*---------------------------------------------------------------*/
190
191/* General comments about naming schemes:
192
193   All publically visible functions contain the name of the primary
194   type on which they operate (IRFoo, IRBar, etc).  Hence you should
195   be able to identify these functions by grepping for "IR[A-Z]".
196
197   For some type 'IRFoo':
198
199   - ppIRFoo is the printing method for IRFoo, printing it to the
200     output channel specified in the LibVEX_Initialise call.
201
202   - eqIRFoo is a structural equality predicate for IRFoos.
203
204   - deepCopyIRFoo is a deep copy constructor for IRFoos.
205     It recursively traverses the entire argument tree and
206     produces a complete new tree.  All types have a deep copy
207     constructor.
208
209   - shallowCopyIRFoo is the shallow copy constructor for IRFoos.
210     It creates a new top-level copy of the supplied object,
211     but does not copy any sub-objects.  Only some types have a
212     shallow copy constructor.
213*/
214
215/* ------------------ Types ------------------ */
216
217/* A type indicates the size of a value, and whether it's an integer, a
218   float, or a vector (SIMD) value. */
219typedef
220   enum {
221      Ity_INVALID=0x1100,
222      Ity_I1,
223      Ity_I8,
224      Ity_I16,
225      Ity_I32,
226      Ity_I64,
227      Ity_I128,  /* 128-bit scalar */
228      Ity_F16,   /* 16 bit float */
229      Ity_F32,   /* IEEE 754 float */
230      Ity_F64,   /* IEEE 754 double */
231      Ity_D32,   /* 32-bit Decimal floating point */
232      Ity_D64,   /* 64-bit Decimal floating point */
233      Ity_D128,  /* 128-bit Decimal floating point */
234      Ity_F128,  /* 128-bit floating point; implementation defined */
235      Ity_V128,  /* 128-bit SIMD */
236      Ity_V256   /* 256-bit SIMD */
237   }
238   IRType;
239
240/* Pretty-print an IRType */
241extern void ppIRType ( IRType );
242
243/* Get the size (in bytes) of an IRType */
244extern Int sizeofIRType ( IRType );
245
246/* Translate 1/2/4/8 into Ity_I{8,16,32,64} respectively.  Asserts on
247   any other input. */
248extern IRType integerIRTypeOfSize ( Int szB );
249
250
251/* ------------------ Endianness ------------------ */
252
253/* IREndness is used in load IRExprs and store IRStmts. */
254typedef
255   enum {
256      Iend_LE=0x1200, /* little endian */
257      Iend_BE          /* big endian */
258   }
259   IREndness;
260
261
262/* ------------------ Constants ------------------ */
263
264/* IRConsts are used within 'Const' and 'Exit' IRExprs. */
265
266/* The various kinds of constant. */
267typedef
268   enum {
269      Ico_U1=0x1300,
270      Ico_U8,
271      Ico_U16,
272      Ico_U32,
273      Ico_U64,
274      Ico_F32,   /* 32-bit IEEE754 floating */
275      Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
276                    as a IEEE754 single value. */
277      Ico_F64,   /* 64-bit IEEE754 floating */
278      Ico_F64i,  /* 64-bit unsigned int to be interpreted literally
279                    as a IEEE754 double value. */
280      Ico_V128,  /* 128-bit restricted vector constant, with 1 bit
281                    (repeated 8 times) for each of the 16 x 1-byte lanes */
282      Ico_V256   /* 256-bit restricted vector constant, with 1 bit
283                    (repeated 8 times) for each of the 32 x 1-byte lanes */
284   }
285   IRConstTag;
286
287/* A constant.  Stored as a tagged union.  'tag' indicates what kind of
288   constant this is.  'Ico' is the union that holds the fields.  If an
289   IRConst 'c' has c.tag equal to Ico_U32, then it's a 32-bit constant,
290   and its value can be accessed with 'c.Ico.U32'. */
291typedef
292   struct _IRConst {
293      IRConstTag tag;
294      union {
295         Bool   U1;
296         UChar  U8;
297         UShort U16;
298         UInt   U32;
299         ULong  U64;
300         Float  F32;
301         UInt   F32i;
302         Double F64;
303         ULong  F64i;
304         UShort V128;   /* 16-bit value; see Ico_V128 comment above */
305         UInt   V256;   /* 32-bit value; see Ico_V256 comment above */
306      } Ico;
307   }
308   IRConst;
309
310/* IRConst constructors */
311extern IRConst* IRConst_U1   ( Bool );
312extern IRConst* IRConst_U8   ( UChar );
313extern IRConst* IRConst_U16  ( UShort );
314extern IRConst* IRConst_U32  ( UInt );
315extern IRConst* IRConst_U64  ( ULong );
316extern IRConst* IRConst_F32  ( Float );
317extern IRConst* IRConst_F32i ( UInt );
318extern IRConst* IRConst_F64  ( Double );
319extern IRConst* IRConst_F64i ( ULong );
320extern IRConst* IRConst_V128 ( UShort );
321extern IRConst* IRConst_V256 ( UInt );
322
323/* Deep-copy an IRConst */
324extern IRConst* deepCopyIRConst ( const IRConst* );
325
326/* Pretty-print an IRConst */
327extern void ppIRConst ( const IRConst* );
328
329/* Compare two IRConsts for equality */
330extern Bool eqIRConst ( const IRConst*, const IRConst* );
331
332
333/* ------------------ Call targets ------------------ */
334
335/* Describes a helper function to call.  The name part is purely for
336   pretty printing and not actually used.  regparms=n tells the back
337   end that the callee has been declared
338   "__attribute__((regparm(n)))", although indirectly using the
339   VEX_REGPARM(n) macro.  On some targets (x86) the back end will need
340   to construct a non-standard sequence to call a function declared
341   like this.
342
343   mcx_mask is a sop to Memcheck.  It indicates which args should be
344   considered 'always defined' when lazily computing definedness of
345   the result.  Bit 0 of mcx_mask corresponds to args[0], bit 1 to
346   args[1], etc.  If a bit is set, the corresponding arg is excluded
347   (hence "x" in "mcx") from definedness checking.
348*/
349
350typedef
351   struct {
352      Int          regparms;
353      const HChar* name;
354      void*        addr;
355      UInt         mcx_mask;
356   }
357   IRCallee;
358
359/* Create an IRCallee. */
360extern IRCallee* mkIRCallee ( Int regparms, const HChar* name, void* addr );
361
362/* Deep-copy an IRCallee. */
363extern IRCallee* deepCopyIRCallee ( const IRCallee* );
364
365/* Pretty-print an IRCallee. */
366extern void ppIRCallee ( const IRCallee* );
367
368
369/* ------------------ Guest state arrays ------------------ */
370
371/* This describes a section of the guest state that we want to
372   be able to index at run time, so as to be able to describe
373   indexed or rotating register files on the guest. */
374typedef
375   struct {
376      Int    base;   /* guest state offset of start of indexed area */
377      IRType elemTy; /* type of each element in the indexed area */
378      Int    nElems; /* number of elements in the indexed area */
379   }
380   IRRegArray;
381
382extern IRRegArray* mkIRRegArray ( Int, IRType, Int );
383
384extern IRRegArray* deepCopyIRRegArray ( const IRRegArray* );
385
386extern void ppIRRegArray ( const IRRegArray* );
387extern Bool eqIRRegArray ( const IRRegArray*, const IRRegArray* );
388
389
390/* ------------------ Temporaries ------------------ */
391
392/* This represents a temporary, eg. t1.  The IR optimiser relies on the
393   fact that IRTemps are 32-bit ints.  Do not change them to be ints of
394   any other size. */
395typedef UInt IRTemp;
396
397/* Pretty-print an IRTemp. */
398extern void ppIRTemp ( IRTemp );
399
400#define IRTemp_INVALID ((IRTemp)0xFFFFFFFF)
401
402
403/* --------------- Primops (arity 1,2,3 and 4) --------------- */
404
405/* Primitive operations that are used in Unop, Binop, Triop and Qop
406   IRExprs.  Once we take into account integer, floating point and SIMD
407   operations of all the different sizes, there are quite a lot of them.
408   Most instructions supported by the architectures that Vex supports
409   (x86, PPC, etc) are represented.  Some more obscure ones (eg. cpuid)
410   are not;  they are instead handled with dirty helpers that emulate
411   their functionality.  Such obscure ones are thus not directly visible
412   in the IR, but their effects on guest state (memory and registers)
413   are made visible via the annotations in IRDirty structures.
414*/
415typedef
416   enum {
417      /* -- Do not change this ordering.  The IR generators rely on
418            (eg) Iop_Add64 == IopAdd8 + 3. -- */
419
420      Iop_INVALID=0x1400,
421      Iop_Add8,  Iop_Add16,  Iop_Add32,  Iop_Add64,
422      Iop_Sub8,  Iop_Sub16,  Iop_Sub32,  Iop_Sub64,
423      /* Signless mul.  MullS/MullU is elsewhere. */
424      Iop_Mul8,  Iop_Mul16,  Iop_Mul32,  Iop_Mul64,
425      Iop_Or8,   Iop_Or16,   Iop_Or32,   Iop_Or64,
426      Iop_And8,  Iop_And16,  Iop_And32,  Iop_And64,
427      Iop_Xor8,  Iop_Xor16,  Iop_Xor32,  Iop_Xor64,
428      Iop_Shl8,  Iop_Shl16,  Iop_Shl32,  Iop_Shl64,
429      Iop_Shr8,  Iop_Shr16,  Iop_Shr32,  Iop_Shr64,
430      Iop_Sar8,  Iop_Sar16,  Iop_Sar32,  Iop_Sar64,
431      /* Integer comparisons. */
432      Iop_CmpEQ8,  Iop_CmpEQ16,  Iop_CmpEQ32,  Iop_CmpEQ64,
433      Iop_CmpNE8,  Iop_CmpNE16,  Iop_CmpNE32,  Iop_CmpNE64,
434      /* Tags for unary ops */
435      Iop_Not8,  Iop_Not16,  Iop_Not32,  Iop_Not64,
436
437      /* Exactly like CmpEQ8/16/32/64, but carrying the additional
438         hint that these compute the success/failure of a CAS
439         operation, and hence are almost certainly applied to two
440         copies of the same value, which in turn has implications for
441         Memcheck's instrumentation. */
442      Iop_CasCmpEQ8, Iop_CasCmpEQ16, Iop_CasCmpEQ32, Iop_CasCmpEQ64,
443      Iop_CasCmpNE8, Iop_CasCmpNE16, Iop_CasCmpNE32, Iop_CasCmpNE64,
444
445      /* Exactly like CmpNE8/16/32/64, but carrying the additional
446         hint that these needs expensive definedness tracking. */
447      Iop_ExpCmpNE8, Iop_ExpCmpNE16, Iop_ExpCmpNE32, Iop_ExpCmpNE64,
448
449      /* -- Ordering not important after here. -- */
450
451      /* Widening multiplies */
452      Iop_MullS8, Iop_MullS16, Iop_MullS32, Iop_MullS64,
453      Iop_MullU8, Iop_MullU16, Iop_MullU32, Iop_MullU64,
454
455      /* Wierdo integer stuff */
456      Iop_Clz64, Iop_Clz32,   /* count leading zeroes */
457      Iop_Ctz64, Iop_Ctz32,   /* count trailing zeros */
458      /* Ctz64/Ctz32/Clz64/Clz32 are UNDEFINED when given arguments of
459         zero.  You must ensure they are never given a zero argument.
460      */
461
462      /* Standard integer comparisons */
463      Iop_CmpLT32S, Iop_CmpLT64S,
464      Iop_CmpLE32S, Iop_CmpLE64S,
465      Iop_CmpLT32U, Iop_CmpLT64U,
466      Iop_CmpLE32U, Iop_CmpLE64U,
467
468      /* As a sop to Valgrind-Memcheck, the following are useful. */
469      Iop_CmpNEZ8, Iop_CmpNEZ16,  Iop_CmpNEZ32,  Iop_CmpNEZ64,
470      Iop_CmpwNEZ32, Iop_CmpwNEZ64, /* all-0s -> all-Os; other -> all-1s */
471      Iop_Left8, Iop_Left16, Iop_Left32, Iop_Left64, /*  \x -> x | -x */
472      Iop_Max32U, /* unsigned max */
473
474      /* PowerPC-style 3-way integer comparisons.  Without them it is
475         difficult to simulate PPC efficiently.
476         op(x,y) | x < y  = 0x8 else
477                 | x > y  = 0x4 else
478                 | x == y = 0x2
479      */
480      Iop_CmpORD32U, Iop_CmpORD64U,
481      Iop_CmpORD32S, Iop_CmpORD64S,
482
483      /* Division */
484      /* TODO: clarify semantics wrt rounding, negative values, whatever */
485      Iop_DivU32,   // :: I32,I32 -> I32 (simple div, no mod)
486      Iop_DivS32,   // ditto, signed
487      Iop_DivU64,   // :: I64,I64 -> I64 (simple div, no mod)
488      Iop_DivS64,   // ditto, signed
489      Iop_DivU64E,  // :: I64,I64 -> I64 (dividend is 64-bit arg (hi)
490                    //                    concat with 64 0's (low))
491      Iop_DivS64E,  // ditto, signed
492      Iop_DivU32E,  // :: I32,I32 -> I32 (dividend is 32-bit arg (hi)
493                    // concat with 32 0's (low))
494      Iop_DivS32E,  // ditto, signed
495
496      Iop_DivModU64to32, // :: I64,I32 -> I64
497                         // of which lo half is div and hi half is mod
498      Iop_DivModS64to32, // ditto, signed
499
500      Iop_DivModU128to64, // :: V128,I64 -> V128
501                          // of which lo half is div and hi half is mod
502      Iop_DivModS128to64, // ditto, signed
503
504      Iop_DivModS64to64, // :: I64,I64 -> I128
505                         // of which lo half is div and hi half is mod
506
507      /* Integer conversions.  Some of these are redundant (eg
508         Iop_64to8 is the same as Iop_64to32 and then Iop_32to8), but
509         having a complete set reduces the typical dynamic size of IR
510         and makes the instruction selectors easier to write. */
511
512      /* Widening conversions */
513      Iop_8Uto16, Iop_8Uto32,  Iop_8Uto64,
514                  Iop_16Uto32, Iop_16Uto64,
515                               Iop_32Uto64,
516      Iop_8Sto16, Iop_8Sto32,  Iop_8Sto64,
517                  Iop_16Sto32, Iop_16Sto64,
518                               Iop_32Sto64,
519
520      /* Narrowing conversions */
521      Iop_64to8, Iop_32to8, Iop_64to16,
522      /* 8 <-> 16 bit conversions */
523      Iop_16to8,      // :: I16 -> I8, low half
524      Iop_16HIto8,    // :: I16 -> I8, high half
525      Iop_8HLto16,    // :: (I8,I8) -> I16
526      /* 16 <-> 32 bit conversions */
527      Iop_32to16,     // :: I32 -> I16, low half
528      Iop_32HIto16,   // :: I32 -> I16, high half
529      Iop_16HLto32,   // :: (I16,I16) -> I32
530      /* 32 <-> 64 bit conversions */
531      Iop_64to32,     // :: I64 -> I32, low half
532      Iop_64HIto32,   // :: I64 -> I32, high half
533      Iop_32HLto64,   // :: (I32,I32) -> I64
534      /* 64 <-> 128 bit conversions */
535      Iop_128to64,    // :: I128 -> I64, low half
536      Iop_128HIto64,  // :: I128 -> I64, high half
537      Iop_64HLto128,  // :: (I64,I64) -> I128
538      /* 1-bit stuff */
539      Iop_Not1,   /* :: Ity_Bit -> Ity_Bit */
540      Iop_32to1,  /* :: Ity_I32 -> Ity_Bit, just select bit[0] */
541      Iop_64to1,  /* :: Ity_I64 -> Ity_Bit, just select bit[0] */
542      Iop_1Uto8,  /* :: Ity_Bit -> Ity_I8,  unsigned widen */
543      Iop_1Uto32, /* :: Ity_Bit -> Ity_I32, unsigned widen */
544      Iop_1Uto64, /* :: Ity_Bit -> Ity_I64, unsigned widen */
545      Iop_1Sto8,  /* :: Ity_Bit -> Ity_I8,  signed widen */
546      Iop_1Sto16, /* :: Ity_Bit -> Ity_I16, signed widen */
547      Iop_1Sto32, /* :: Ity_Bit -> Ity_I32, signed widen */
548      Iop_1Sto64, /* :: Ity_Bit -> Ity_I64, signed widen */
549
550      /* ------ Floating point.  We try to be IEEE754 compliant. ------ */
551
552      /* --- Simple stuff as mandated by 754. --- */
553
554      /* Binary operations, with rounding. */
555      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
556      Iop_AddF64, Iop_SubF64, Iop_MulF64, Iop_DivF64,
557
558      /* :: IRRoundingMode(I32) x F32 x F32 -> F32 */
559      Iop_AddF32, Iop_SubF32, Iop_MulF32, Iop_DivF32,
560
561      /* Variants of the above which produce a 64-bit result but which
562         round their result to a IEEE float range first. */
563      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
564      Iop_AddF64r32, Iop_SubF64r32, Iop_MulF64r32, Iop_DivF64r32,
565
566      /* Unary operations, without rounding. */
567      /* :: F64 -> F64 */
568      Iop_NegF64, Iop_AbsF64,
569
570      /* :: F32 -> F32 */
571      Iop_NegF32, Iop_AbsF32,
572
573      /* Unary operations, with rounding. */
574      /* :: IRRoundingMode(I32) x F64 -> F64 */
575      Iop_SqrtF64,
576
577      /* :: IRRoundingMode(I32) x F32 -> F32 */
578      Iop_SqrtF32,
579
580      /* Comparison, yielding GT/LT/EQ/UN(ordered), as per the following:
581            0x45 Unordered
582            0x01 LT
583            0x00 GT
584            0x40 EQ
585         This just happens to be the Intel encoding.  The values
586         are recorded in the type IRCmpF64Result.
587      */
588      /* :: F64 x F64 -> IRCmpF64Result(I32) */
589      Iop_CmpF64,
590      Iop_CmpF32,
591      Iop_CmpF128,
592
593      /* --- Int to/from FP conversions. --- */
594
595      /* For the most part, these take a first argument :: Ity_I32 (as
596         IRRoundingMode) which is an indication of the rounding mode
597         to use, as per the following encoding ("the standard
598         encoding"):
599            00b  to nearest (the default)
600            01b  to -infinity
601            10b  to +infinity
602            11b  to zero
603         This just happens to be the Intel encoding.  For reference only,
604         the PPC encoding is:
605            00b  to nearest (the default)
606            01b  to zero
607            10b  to +infinity
608            11b  to -infinity
609         Any PPC -> IR front end will have to translate these PPC
610         encodings, as encoded in the guest state, to the standard
611         encodings, to pass to the primops.
612         For reference only, the ARM VFP encoding is:
613            00b  to nearest
614            01b  to +infinity
615            10b  to -infinity
616            11b  to zero
617         Again, this will have to be converted to the standard encoding
618         to pass to primops.
619
620         If one of these conversions gets an out-of-range condition,
621         or a NaN, as an argument, the result is host-defined.  On x86
622         the "integer indefinite" value 0x80..00 is produced.  On PPC
623         it is either 0x80..00 or 0x7F..FF depending on the sign of
624         the argument.
625
626         On ARMvfp, when converting to a signed integer result, the
627         overflow result is 0x80..00 for negative args and 0x7F..FF
628         for positive args.  For unsigned integer results it is
629         0x00..00 and 0xFF..FF respectively.
630
631         Rounding is required whenever the destination type cannot
632         represent exactly all values of the source type.
633      */
634      Iop_F64toI16S, /* IRRoundingMode(I32) x F64 -> signed I16 */
635      Iop_F64toI32S, /* IRRoundingMode(I32) x F64 -> signed I32 */
636      Iop_F64toI64S, /* IRRoundingMode(I32) x F64 -> signed I64 */
637      Iop_F64toI64U, /* IRRoundingMode(I32) x F64 -> unsigned I64 */
638
639      Iop_F64toI32U, /* IRRoundingMode(I32) x F64 -> unsigned I32 */
640
641      Iop_I32StoF64, /*                       signed I32 -> F64 */
642      Iop_I64StoF64, /* IRRoundingMode(I32) x signed I64 -> F64 */
643      Iop_I64UtoF64, /* IRRoundingMode(I32) x unsigned I64 -> F64 */
644      Iop_I64UtoF32, /* IRRoundingMode(I32) x unsigned I64 -> F32 */
645
646      Iop_I32UtoF32, /* IRRoundingMode(I32) x unsigned I32 -> F32 */
647      Iop_I32UtoF64, /*                       unsigned I32 -> F64 */
648
649      Iop_F32toI32S, /* IRRoundingMode(I32) x F32 -> signed I32 */
650      Iop_F32toI64S, /* IRRoundingMode(I32) x F32 -> signed I64 */
651      Iop_F32toI32U, /* IRRoundingMode(I32) x F32 -> unsigned I32 */
652      Iop_F32toI64U, /* IRRoundingMode(I32) x F32 -> unsigned I64 */
653
654      Iop_I32StoF32, /* IRRoundingMode(I32) x signed I32 -> F32 */
655      Iop_I64StoF32, /* IRRoundingMode(I32) x signed I64 -> F32 */
656
657      /* Conversion between floating point formats */
658      Iop_F32toF64,  /*                       F32 -> F64 */
659      Iop_F64toF32,  /* IRRoundingMode(I32) x F64 -> F32 */
660
661      /* Reinterpretation.  Take an F64 and produce an I64 with
662         the same bit pattern, or vice versa. */
663      Iop_ReinterpF64asI64, Iop_ReinterpI64asF64,
664      Iop_ReinterpF32asI32, Iop_ReinterpI32asF32,
665
666      /* Support for 128-bit floating point */
667      Iop_F64HLtoF128,/* (high half of F128,low half of F128) -> F128 */
668      Iop_F128HItoF64,/* F128 -> high half of F128 into a F64 register */
669      Iop_F128LOtoF64,/* F128 -> low  half of F128 into a F64 register */
670
671      /* :: IRRoundingMode(I32) x F128 x F128 -> F128 */
672      Iop_AddF128, Iop_SubF128, Iop_MulF128, Iop_DivF128,
673
674      /* :: F128 -> F128 */
675      Iop_NegF128, Iop_AbsF128,
676
677      /* :: IRRoundingMode(I32) x F128 -> F128 */
678      Iop_SqrtF128,
679
680      Iop_I32StoF128, /*                signed I32  -> F128 */
681      Iop_I64StoF128, /*                signed I64  -> F128 */
682      Iop_I32UtoF128, /*              unsigned I32  -> F128 */
683      Iop_I64UtoF128, /*              unsigned I64  -> F128 */
684      Iop_F32toF128,  /*                       F32  -> F128 */
685      Iop_F64toF128,  /*                       F64  -> F128 */
686
687      Iop_F128toI32S, /* IRRoundingMode(I32) x F128 -> signed I32  */
688      Iop_F128toI64S, /* IRRoundingMode(I32) x F128 -> signed I64  */
689      Iop_F128toI32U, /* IRRoundingMode(I32) x F128 -> unsigned I32  */
690      Iop_F128toI64U, /* IRRoundingMode(I32) x F128 -> unsigned I64  */
691      Iop_F128toF64,  /* IRRoundingMode(I32) x F128 -> F64         */
692      Iop_F128toF32,  /* IRRoundingMode(I32) x F128 -> F32         */
693
694      /* --- guest x86/amd64 specifics, not mandated by 754. --- */
695
696      /* Binary ops, with rounding. */
697      /* :: IRRoundingMode(I32) x F64 x F64 -> F64 */
698      Iop_AtanF64,       /* FPATAN,  arctan(arg1/arg2)       */
699      Iop_Yl2xF64,       /* FYL2X,   arg1 * log2(arg2)       */
700      Iop_Yl2xp1F64,     /* FYL2XP1, arg1 * log2(arg2+1.0)   */
701      Iop_PRemF64,       /* FPREM,   non-IEEE remainder(arg1/arg2)    */
702      Iop_PRemC3210F64,  /* C3210 flags resulting from FPREM, :: I32 */
703      Iop_PRem1F64,      /* FPREM1,  IEEE remainder(arg1/arg2)    */
704      Iop_PRem1C3210F64, /* C3210 flags resulting from FPREM1, :: I32 */
705      Iop_ScaleF64,      /* FSCALE,  arg1 * (2^RoundTowardsZero(arg2)) */
706      /* Note that on x86 guest, PRem1{C3210} has the same behaviour
707         as the IEEE mandated RemF64, except it is limited in the
708         range of its operand.  Hence the partialness. */
709
710      /* Unary ops, with rounding. */
711      /* :: IRRoundingMode(I32) x F64 -> F64 */
712      Iop_SinF64,    /* FSIN */
713      Iop_CosF64,    /* FCOS */
714      Iop_TanF64,    /* FTAN */
715      Iop_2xm1F64,   /* (2^arg - 1.0) */
716      Iop_RoundF128toInt, /* F128 value to nearest integral value (still
717                             as F128) */
718      Iop_RoundF64toInt, /* F64 value to nearest integral value (still
719                            as F64) */
720      Iop_RoundF32toInt, /* F32 value to nearest integral value (still
721                            as F32) */
722
723      /* --- guest s390 specifics, not mandated by 754. --- */
724
725      /* Fused multiply-add/sub */
726      /* :: IRRoundingMode(I32) x F32 x F32 x F32 -> F32
727            (computes arg2 * arg3 +/- arg4) */
728      Iop_MAddF32, Iop_MSubF32,
729
730      /* --- guest ppc32/64 specifics, not mandated by 754. --- */
731
732      /* Ternary operations, with rounding. */
733      /* Fused multiply-add/sub, with 112-bit intermediate
734         precision for ppc.
735         Also used to implement fused multiply-add/sub for s390. */
736      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64
737            (computes arg2 * arg3 +/- arg4) */
738      Iop_MAddF64, Iop_MSubF64,
739
740      /* Variants of the above which produce a 64-bit result but which
741         round their result to a IEEE float range first. */
742      /* :: IRRoundingMode(I32) x F64 x F64 x F64 -> F64 */
743      Iop_MAddF64r32, Iop_MSubF64r32,
744
745      /* :: F64 -> F64 */
746      Iop_RSqrtEst5GoodF64, /* reciprocal square root estimate, 5 good bits */
747      Iop_RoundF64toF64_NEAREST, /* frin */
748      Iop_RoundF64toF64_NegINF,  /* frim */
749      Iop_RoundF64toF64_PosINF,  /* frip */
750      Iop_RoundF64toF64_ZERO,    /* friz */
751
752      /* :: F64 -> F32 */
753      Iop_TruncF64asF32, /* do F64->F32 truncation as per 'fsts' */
754
755      /* :: IRRoundingMode(I32) x F64 -> F64 */
756      Iop_RoundF64toF32, /* round F64 to nearest F32 value (still as F64) */
757      /* NB: pretty much the same as Iop_F64toF32, except no change
758         of type. */
759
760      /* --- guest arm64 specifics, not mandated by 754. --- */
761
762      Iop_RecpExpF64,  /* FRECPX d  :: IRRoundingMode(I32) x F64 -> F64 */
763      Iop_RecpExpF32,  /* FRECPX s  :: IRRoundingMode(I32) x F32 -> F32 */
764
765      /* ------------------ 16-bit scalar FP ------------------ */
766
767      Iop_F16toF64,  /*                       F16 -> F64 */
768      Iop_F64toF16,  /* IRRoundingMode(I32) x F64 -> F16 */
769
770      Iop_F16toF32,  /*                       F16 -> F32 */
771      Iop_F32toF16,  /* IRRoundingMode(I32) x F32 -> F16 */
772
773      /* ------------------ 32-bit SIMD Integer ------------------ */
774
775      /* 32x1 saturating add/sub (ok, well, not really SIMD :) */
776      Iop_QAdd32S,
777      Iop_QSub32S,
778
779      /* 16x2 add/sub, also signed/unsigned saturating variants */
780      Iop_Add16x2, Iop_Sub16x2,
781      Iop_QAdd16Sx2, Iop_QAdd16Ux2,
782      Iop_QSub16Sx2, Iop_QSub16Ux2,
783
784      /* 16x2 signed/unsigned halving add/sub.  For each lane, these
785         compute bits 16:1 of (eg) sx(argL) + sx(argR),
786         or zx(argL) - zx(argR) etc. */
787      Iop_HAdd16Ux2, Iop_HAdd16Sx2,
788      Iop_HSub16Ux2, Iop_HSub16Sx2,
789
790      /* 8x4 add/sub, also signed/unsigned saturating variants */
791      Iop_Add8x4, Iop_Sub8x4,
792      Iop_QAdd8Sx4, Iop_QAdd8Ux4,
793      Iop_QSub8Sx4, Iop_QSub8Ux4,
794
795      /* 8x4 signed/unsigned halving add/sub.  For each lane, these
796         compute bits 8:1 of (eg) sx(argL) + sx(argR),
797         or zx(argL) - zx(argR) etc. */
798      Iop_HAdd8Ux4, Iop_HAdd8Sx4,
799      Iop_HSub8Ux4, Iop_HSub8Sx4,
800
801      /* 8x4 sum of absolute unsigned differences. */
802      Iop_Sad8Ux4,
803
804      /* MISC (vector integer cmp != 0) */
805      Iop_CmpNEZ16x2, Iop_CmpNEZ8x4,
806
807      /* ------------------ 64-bit SIMD FP ------------------------ */
808
809      /* Convertion to/from int */
810      Iop_I32UtoFx2,  Iop_I32StoFx2,    /* I32x4 -> F32x4 */
811      Iop_FtoI32Ux2_RZ,  Iop_FtoI32Sx2_RZ,    /* F32x4 -> I32x4 */
812      /* Fixed32 format is floating-point number with fixed number of fraction
813         bits. The number of fraction bits is passed as a second argument of
814         type I8. */
815      Iop_F32ToFixed32Ux2_RZ, Iop_F32ToFixed32Sx2_RZ, /* fp -> fixed-point */
816      Iop_Fixed32UToF32x2_RN, Iop_Fixed32SToF32x2_RN, /* fixed-point -> fp */
817
818      /* Binary operations */
819      Iop_Max32Fx2,      Iop_Min32Fx2,
820      /* Pairwise Min and Max. See integer pairwise operations for more
821         details. */
822      Iop_PwMax32Fx2,    Iop_PwMin32Fx2,
823      /* Note: For the following compares, the arm front-end assumes a
824         nan in a lane of either argument returns zero for that lane. */
825      Iop_CmpEQ32Fx2, Iop_CmpGT32Fx2, Iop_CmpGE32Fx2,
826
827      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
828      element in the operand vector, and places the results in the destination
829      vector.  */
830      Iop_RecipEst32Fx2,
831
832      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
833         Note, that if one of the arguments is zero and another one is infinity
834         of arbitrary sign the result of the operation is 2.0. */
835      Iop_RecipStep32Fx2,
836
837      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
838         square root of each element in the operand vector. */
839      Iop_RSqrtEst32Fx2,
840
841      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
842         Note, that of one of the arguments is zero and another one is infiinty
843         of arbitrary sign the result of the operation is 1.5. */
844      Iop_RSqrtStep32Fx2,
845
846      /* Unary */
847      Iop_Neg32Fx2, Iop_Abs32Fx2,
848
849      /* ------------------ 64-bit SIMD Integer. ------------------ */
850
851      /* MISC (vector integer cmp != 0) */
852      Iop_CmpNEZ8x8, Iop_CmpNEZ16x4, Iop_CmpNEZ32x2,
853
854      /* ADDITION (normal / unsigned sat / signed sat) */
855      Iop_Add8x8,   Iop_Add16x4,   Iop_Add32x2,
856      Iop_QAdd8Ux8, Iop_QAdd16Ux4, Iop_QAdd32Ux2, Iop_QAdd64Ux1,
857      Iop_QAdd8Sx8, Iop_QAdd16Sx4, Iop_QAdd32Sx2, Iop_QAdd64Sx1,
858
859      /* PAIRWISE operations */
860      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
861            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
862      Iop_PwAdd8x8,  Iop_PwAdd16x4,  Iop_PwAdd32x2,
863      Iop_PwMax8Sx8, Iop_PwMax16Sx4, Iop_PwMax32Sx2,
864      Iop_PwMax8Ux8, Iop_PwMax16Ux4, Iop_PwMax32Ux2,
865      Iop_PwMin8Sx8, Iop_PwMin16Sx4, Iop_PwMin32Sx2,
866      Iop_PwMin8Ux8, Iop_PwMin16Ux4, Iop_PwMin32Ux2,
867      /* Longening variant is unary. The resulting vector contains two times
868         less elements than operand, but they are two times wider.
869         Example:
870            Iop_PAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
871               where a+b and c+d are unsigned 32-bit values. */
872      Iop_PwAddL8Ux8, Iop_PwAddL16Ux4, Iop_PwAddL32Ux2,
873      Iop_PwAddL8Sx8, Iop_PwAddL16Sx4, Iop_PwAddL32Sx2,
874
875      /* SUBTRACTION (normal / unsigned sat / signed sat) */
876      Iop_Sub8x8,   Iop_Sub16x4,   Iop_Sub32x2,
877      Iop_QSub8Ux8, Iop_QSub16Ux4, Iop_QSub32Ux2, Iop_QSub64Ux1,
878      Iop_QSub8Sx8, Iop_QSub16Sx4, Iop_QSub32Sx2, Iop_QSub64Sx1,
879
880      /* ABSOLUTE VALUE */
881      Iop_Abs8x8, Iop_Abs16x4, Iop_Abs32x2,
882
883      /* MULTIPLICATION (normal / high half of signed/unsigned / plynomial ) */
884      Iop_Mul8x8, Iop_Mul16x4, Iop_Mul32x2,
885      Iop_Mul32Fx2,
886      Iop_MulHi16Ux4,
887      Iop_MulHi16Sx4,
888      /* Plynomial multiplication treats it's arguments as coefficients of
889         polynoms over {0, 1}. */
890      Iop_PolynomialMul8x8,
891
892      /* Vector Saturating Doubling Multiply Returning High Half and
893         Vector Saturating Rounding Doubling Multiply Returning High Half */
894      /* These IROp's multiply corresponding elements in two vectors, double
895         the results, and place the most significant half of the final results
896         in the destination vector. The results are truncated or rounded. If
897         any of the results overflow, they are saturated. */
898      Iop_QDMulHi16Sx4, Iop_QDMulHi32Sx2,
899      Iop_QRDMulHi16Sx4, Iop_QRDMulHi32Sx2,
900
901      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
902      Iop_Avg8Ux8,
903      Iop_Avg16Ux4,
904
905      /* MIN/MAX */
906      Iop_Max8Sx8, Iop_Max16Sx4, Iop_Max32Sx2,
907      Iop_Max8Ux8, Iop_Max16Ux4, Iop_Max32Ux2,
908      Iop_Min8Sx8, Iop_Min16Sx4, Iop_Min32Sx2,
909      Iop_Min8Ux8, Iop_Min16Ux4, Iop_Min32Ux2,
910
911      /* COMPARISON */
912      Iop_CmpEQ8x8,  Iop_CmpEQ16x4,  Iop_CmpEQ32x2,
913      Iop_CmpGT8Ux8, Iop_CmpGT16Ux4, Iop_CmpGT32Ux2,
914      Iop_CmpGT8Sx8, Iop_CmpGT16Sx4, Iop_CmpGT32Sx2,
915
916      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
917         bit) */
918      Iop_Cnt8x8,
919      Iop_Clz8x8, Iop_Clz16x4, Iop_Clz32x2,
920      Iop_Cls8x8, Iop_Cls16x4, Iop_Cls32x2,
921      Iop_Clz64x2,
922
923      /* VECTOR x VECTOR SHIFT / ROTATE */
924      Iop_Shl8x8, Iop_Shl16x4, Iop_Shl32x2,
925      Iop_Shr8x8, Iop_Shr16x4, Iop_Shr32x2,
926      Iop_Sar8x8, Iop_Sar16x4, Iop_Sar32x2,
927      Iop_Sal8x8, Iop_Sal16x4, Iop_Sal32x2, Iop_Sal64x1,
928
929      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
930      Iop_ShlN8x8, Iop_ShlN16x4, Iop_ShlN32x2,
931      Iop_ShrN8x8, Iop_ShrN16x4, Iop_ShrN32x2,
932      Iop_SarN8x8, Iop_SarN16x4, Iop_SarN32x2,
933
934      /* VECTOR x VECTOR SATURATING SHIFT */
935      Iop_QShl8x8, Iop_QShl16x4, Iop_QShl32x2, Iop_QShl64x1,
936      Iop_QSal8x8, Iop_QSal16x4, Iop_QSal32x2, Iop_QSal64x1,
937      /* VECTOR x INTEGER SATURATING SHIFT */
938      Iop_QShlNsatSU8x8,  Iop_QShlNsatSU16x4,
939      Iop_QShlNsatSU32x2, Iop_QShlNsatSU64x1,
940      Iop_QShlNsatUU8x8,  Iop_QShlNsatUU16x4,
941      Iop_QShlNsatUU32x2, Iop_QShlNsatUU64x1,
942      Iop_QShlNsatSS8x8,  Iop_QShlNsatSS16x4,
943      Iop_QShlNsatSS32x2, Iop_QShlNsatSS64x1,
944
945      /* NARROWING (binary)
946         -- narrow 2xI64 into 1xI64, hi half from left arg */
947      /* For saturated narrowing, I believe there are 4 variants of
948         the basic arithmetic operation, depending on the signedness
949         of argument and result.  Here are examples that exemplify
950         what I mean:
951
952         QNarrow16Uto8U ( UShort x )  if (x >u 255) x = 255;
953                                      return x[7:0];
954
955         QNarrow16Sto8S ( Short x )   if (x <s -128) x = -128;
956                                      if (x >s  127) x = 127;
957                                      return x[7:0];
958
959         QNarrow16Uto8S ( UShort x )  if (x >u 127) x = 127;
960                                      return x[7:0];
961
962         QNarrow16Sto8U ( Short x )   if (x <s 0)   x = 0;
963                                      if (x >s 255) x = 255;
964                                      return x[7:0];
965      */
966      Iop_QNarrowBin16Sto8Ux8,
967      Iop_QNarrowBin16Sto8Sx8, Iop_QNarrowBin32Sto16Sx4,
968      Iop_NarrowBin16to8x8,    Iop_NarrowBin32to16x4,
969
970      /* INTERLEAVING */
971      /* Interleave lanes from low or high halves of
972         operands.  Most-significant result lane is from the left
973         arg. */
974      Iop_InterleaveHI8x8, Iop_InterleaveHI16x4, Iop_InterleaveHI32x2,
975      Iop_InterleaveLO8x8, Iop_InterleaveLO16x4, Iop_InterleaveLO32x2,
976      /* Interleave odd/even lanes of operands.  Most-significant result lane
977         is from the left arg.  Note that Interleave{Odd,Even}Lanes32x2 are
978         identical to Interleave{HI,LO}32x2 and so are omitted.*/
979      Iop_InterleaveOddLanes8x8, Iop_InterleaveEvenLanes8x8,
980      Iop_InterleaveOddLanes16x4, Iop_InterleaveEvenLanes16x4,
981
982      /* CONCATENATION -- build a new value by concatenating either
983         the even or odd lanes of both operands.  Note that
984         Cat{Odd,Even}Lanes32x2 are identical to Interleave{HI,LO}32x2
985         and so are omitted. */
986      Iop_CatOddLanes8x8, Iop_CatOddLanes16x4,
987      Iop_CatEvenLanes8x8, Iop_CatEvenLanes16x4,
988
989      /* GET / SET elements of VECTOR
990         GET is binop (I64, I8) -> I<elem_size>
991         SET is triop (I64, I8, I<elem_size>) -> I64 */
992      /* Note: the arm back-end handles only constant second argument */
993      Iop_GetElem8x8, Iop_GetElem16x4, Iop_GetElem32x2,
994      Iop_SetElem8x8, Iop_SetElem16x4, Iop_SetElem32x2,
995
996      /* DUPLICATING -- copy value to all lanes */
997      Iop_Dup8x8,   Iop_Dup16x4,   Iop_Dup32x2,
998
999      /* SLICE -- produces the lowest 64 bits of (arg1:arg2) >> (8 * arg3).
1000         arg3 is a shift amount in bytes and may be between 0 and 8
1001         inclusive.  When 0, the result is arg2; when 8, the result is arg1.
1002         Not all back ends handle all values.  The arm32 and arm64 back
1003         ends handle only immediate arg3 values. */
1004      Iop_Slice64,  // (I64, I64, I8) -> I64
1005
1006      /* REVERSE the order of chunks in vector lanes.  Chunks must be
1007         smaller than the vector lanes (obviously) and so may be 8-,
1008         16- and 32-bit in size. */
1009      /* Examples:
1010            Reverse8sIn16_x4([a,b,c,d,e,f,g,h]) = [b,a,d,c,f,e,h,g]
1011            Reverse8sIn32_x2([a,b,c,d,e,f,g,h]) = [d,c,b,a,h,g,f,e]
1012            Reverse8sIn64_x1([a,b,c,d,e,f,g,h]) = [h,g,f,e,d,c,b,a] */
1013      Iop_Reverse8sIn16_x4,
1014      Iop_Reverse8sIn32_x2, Iop_Reverse16sIn32_x2,
1015      Iop_Reverse8sIn64_x1, Iop_Reverse16sIn64_x1, Iop_Reverse32sIn64_x1,
1016
1017      /* PERMUTING -- copy src bytes to dst,
1018         as indexed by control vector bytes:
1019            for i in 0 .. 7 . result[i] = argL[ argR[i] ]
1020         argR[i] values may only be in the range 0 .. 7, else behaviour
1021         is undefined. */
1022      Iop_Perm8x8,
1023
1024      /* MISC CONVERSION -- get high bits of each byte lane, a la
1025         x86/amd64 pmovmskb */
1026      Iop_GetMSBs8x8, /* I64 -> I8 */
1027
1028      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1029         See floating-point equivalents for details. */
1030      Iop_RecipEst32Ux2, Iop_RSqrtEst32Ux2,
1031
1032      /* ------------------ Decimal Floating Point ------------------ */
1033
1034      /* ARITHMETIC INSTRUCTIONS   64-bit
1035	 ----------------------------------
1036	 IRRoundingMode(I32) X D64 X D64 -> D64
1037      */
1038      Iop_AddD64, Iop_SubD64, Iop_MulD64, Iop_DivD64,
1039
1040      /* ARITHMETIC INSTRUCTIONS  128-bit
1041	 ----------------------------------
1042	 IRRoundingMode(I32) X D128 X D128 -> D128
1043      */
1044      Iop_AddD128, Iop_SubD128, Iop_MulD128, Iop_DivD128,
1045
1046      /* SHIFT SIGNIFICAND INSTRUCTIONS
1047       *    The DFP significand is shifted by the number of digits specified
1048       *    by the U8 operand.  Digits shifted out of the leftmost digit are
1049       *    lost. Zeros are supplied to the vacated positions on the right.
1050       *    The sign of the result is the same as the sign of the original
1051       *    operand.
1052       *
1053       * D64 x U8  -> D64    left shift and right shift respectively */
1054      Iop_ShlD64, Iop_ShrD64,
1055
1056      /* D128 x U8  -> D128  left shift and right shift respectively */
1057      Iop_ShlD128, Iop_ShrD128,
1058
1059
1060      /* FORMAT CONVERSION INSTRUCTIONS
1061       *   D32 -> D64
1062       */
1063      Iop_D32toD64,
1064
1065      /*   D64 -> D128 */
1066      Iop_D64toD128,
1067
1068      /*   I32S -> D128 */
1069      Iop_I32StoD128,
1070
1071      /*   I32U -> D128 */
1072      Iop_I32UtoD128,
1073
1074      /*   I64S -> D128 */
1075      Iop_I64StoD128,
1076
1077      /*   I64U -> D128 */
1078      Iop_I64UtoD128,
1079
1080      /*   IRRoundingMode(I32) x D64 -> D32 */
1081      Iop_D64toD32,
1082
1083      /*   IRRoundingMode(I32) x D128 -> D64 */
1084      Iop_D128toD64,
1085
1086      /*   I32S -> D64 */
1087      Iop_I32StoD64,
1088
1089      /*   I32U -> D64 */
1090      Iop_I32UtoD64,
1091
1092      /*   IRRoundingMode(I32) x I64 -> D64 */
1093      Iop_I64StoD64,
1094
1095      /*   IRRoundingMode(I32) x I64 -> D64 */
1096      Iop_I64UtoD64,
1097
1098      /*   IRRoundingMode(I32) x D64 -> I32 */
1099      Iop_D64toI32S,
1100
1101      /*   IRRoundingMode(I32) x D64 -> I32 */
1102      Iop_D64toI32U,
1103
1104      /*   IRRoundingMode(I32) x D64 -> I64 */
1105      Iop_D64toI64S,
1106
1107      /*   IRRoundingMode(I32) x D64 -> I64 */
1108      Iop_D64toI64U,
1109
1110      /*   IRRoundingMode(I32) x D128 -> I32 */
1111      Iop_D128toI32S,
1112
1113      /*   IRRoundingMode(I32) x D128 -> I32 */
1114      Iop_D128toI32U,
1115
1116      /*   IRRoundingMode(I32) x D128 -> I64 */
1117      Iop_D128toI64S,
1118
1119      /*   IRRoundingMode(I32) x D128 -> I64 */
1120      Iop_D128toI64U,
1121
1122      /*   IRRoundingMode(I32) x F32 -> D32 */
1123      Iop_F32toD32,
1124
1125      /*   IRRoundingMode(I32) x F32 -> D64 */
1126      Iop_F32toD64,
1127
1128      /*   IRRoundingMode(I32) x F32 -> D128 */
1129      Iop_F32toD128,
1130
1131      /*   IRRoundingMode(I32) x F64 -> D32 */
1132      Iop_F64toD32,
1133
1134      /*   IRRoundingMode(I32) x F64 -> D64 */
1135      Iop_F64toD64,
1136
1137      /*   IRRoundingMode(I32) x F64 -> D128 */
1138      Iop_F64toD128,
1139
1140      /*   IRRoundingMode(I32) x F128 -> D32 */
1141      Iop_F128toD32,
1142
1143      /*   IRRoundingMode(I32) x F128 -> D64 */
1144      Iop_F128toD64,
1145
1146      /*   IRRoundingMode(I32) x F128 -> D128 */
1147      Iop_F128toD128,
1148
1149      /*   IRRoundingMode(I32) x D32 -> F32 */
1150      Iop_D32toF32,
1151
1152      /*   IRRoundingMode(I32) x D32 -> F64 */
1153      Iop_D32toF64,
1154
1155      /*   IRRoundingMode(I32) x D32 -> F128 */
1156      Iop_D32toF128,
1157
1158      /*   IRRoundingMode(I32) x D64 -> F32 */
1159      Iop_D64toF32,
1160
1161      /*   IRRoundingMode(I32) x D64 -> F64 */
1162      Iop_D64toF64,
1163
1164      /*   IRRoundingMode(I32) x D64 -> F128 */
1165      Iop_D64toF128,
1166
1167      /*   IRRoundingMode(I32) x D128 -> F32 */
1168      Iop_D128toF32,
1169
1170      /*   IRRoundingMode(I32) x D128 -> F64 */
1171      Iop_D128toF64,
1172
1173      /*   IRRoundingMode(I32) x D128 -> F128 */
1174      Iop_D128toF128,
1175
1176      /* ROUNDING INSTRUCTIONS
1177       * IRRoundingMode(I32) x D64 -> D64
1178       * The D64 operand, if a finite number, it is rounded to a
1179       * floating point integer value, i.e. no fractional part.
1180       */
1181      Iop_RoundD64toInt,
1182
1183      /* IRRoundingMode(I32) x D128 -> D128 */
1184      Iop_RoundD128toInt,
1185
1186      /* COMPARE INSTRUCTIONS
1187       * D64 x D64 -> IRCmpD64Result(I32) */
1188      Iop_CmpD64,
1189
1190      /* D128 x D128 -> IRCmpD128Result(I32) */
1191      Iop_CmpD128,
1192
1193      /* COMPARE BIASED EXPONENET INSTRUCTIONS
1194       * D64 x D64 -> IRCmpD64Result(I32) */
1195      Iop_CmpExpD64,
1196
1197      /* D128 x D128 -> IRCmpD128Result(I32) */
1198      Iop_CmpExpD128,
1199
1200      /* QUANTIZE AND ROUND INSTRUCTIONS
1201       * The source operand is converted and rounded to the form with the
1202       * immediate exponent specified by the rounding and exponent parameter.
1203       *
1204       * The second operand is converted and rounded to the form
1205       * of the first operand's exponent and the rounded based on the specified
1206       * rounding mode parameter.
1207       *
1208       * IRRoundingMode(I32) x D64 x D64-> D64 */
1209      Iop_QuantizeD64,
1210
1211      /* IRRoundingMode(I32) x D128 x D128 -> D128 */
1212      Iop_QuantizeD128,
1213
1214      /* IRRoundingMode(I32) x I8 x D64 -> D64
1215       *    The Decimal Floating point operand is rounded to the requested
1216       *    significance given by the I8 operand as specified by the rounding
1217       *    mode.
1218       */
1219      Iop_SignificanceRoundD64,
1220
1221      /* IRRoundingMode(I32) x I8 x D128 -> D128 */
1222      Iop_SignificanceRoundD128,
1223
1224      /* EXTRACT AND INSERT INSTRUCTIONS
1225       * D64 -> I64
1226       *    The exponent of the D32 or D64 operand is extracted.  The
1227       *    extracted exponent is converted to a 64-bit signed binary integer.
1228       */
1229      Iop_ExtractExpD64,
1230
1231      /* D128 -> I64 */
1232      Iop_ExtractExpD128,
1233
1234      /* D64 -> I64
1235       * The number of significand digits of the D64 operand is extracted.
1236       * The number is stored as a 64-bit signed binary integer.
1237       */
1238      Iop_ExtractSigD64,
1239
1240      /* D128 -> I64 */
1241      Iop_ExtractSigD128,
1242
1243      /* I64 x D64  -> D64
1244       *    The exponent is specified by the first I64 operand the signed
1245       *    significand is given by the second I64 value.  The result is a D64
1246       *    value consisting of the specified significand and exponent whose
1247       *    sign is that of the specified significand.
1248       */
1249      Iop_InsertExpD64,
1250
1251      /* I64 x D128 -> D128 */
1252      Iop_InsertExpD128,
1253
1254      /* Support for 128-bit DFP type */
1255      Iop_D64HLtoD128, Iop_D128HItoD64, Iop_D128LOtoD64,
1256
1257      /*  I64 -> I64
1258       *     Convert 50-bit densely packed BCD string to 60 bit BCD string
1259       */
1260      Iop_DPBtoBCD,
1261
1262      /* I64 -> I64
1263       *     Convert 60 bit BCD string to 50-bit densely packed BCD string
1264       */
1265      Iop_BCDtoDPB,
1266
1267      /* BCD arithmetic instructions, (V128, V128) -> V128
1268       * The BCD format is the same as that used in the BCD<->DPB conversion
1269       * routines, except using 124 digits (vs 60) plus the trailing 4-bit
1270       * signed code. */
1271      Iop_BCDAdd, Iop_BCDSub,
1272
1273      /* Conversion I64 -> D64 */
1274      Iop_ReinterpI64asD64,
1275
1276      /* Conversion D64 -> I64 */
1277      Iop_ReinterpD64asI64,
1278
1279      /* ------------------ 128-bit SIMD FP. ------------------ */
1280
1281      /* --- 32x4 vector FP --- */
1282
1283      /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */
1284      Iop_Add32Fx4, Iop_Sub32Fx4, Iop_Mul32Fx4, Iop_Div32Fx4,
1285
1286      /* binary */
1287      Iop_Max32Fx4, Iop_Min32Fx4,
1288      Iop_Add32Fx2, Iop_Sub32Fx2,
1289      /* Note: For the following compares, the ppc and arm front-ends assume a
1290         nan in a lane of either argument returns zero for that lane. */
1291      Iop_CmpEQ32Fx4, Iop_CmpLT32Fx4, Iop_CmpLE32Fx4, Iop_CmpUN32Fx4,
1292      Iop_CmpGT32Fx4, Iop_CmpGE32Fx4,
1293
1294      /* Pairwise Max and Min. See integer pairwise operations for details. */
1295      Iop_PwMax32Fx4, Iop_PwMin32Fx4,
1296
1297      /* unary */
1298      Iop_Abs32Fx4,
1299      Iop_Neg32Fx4,
1300
1301      /* binary :: IRRoundingMode(I32) x V128 -> V128 */
1302      Iop_Sqrt32Fx4,
1303
1304      /* Vector Reciprocal Estimate finds an approximate reciprocal of each
1305         element in the operand vector, and places the results in the
1306         destination vector.  */
1307      Iop_RecipEst32Fx4,
1308
1309      /* Vector Reciprocal Step computes (2.0 - arg1 * arg2).
1310         Note, that if one of the arguments is zero and another one is infinity
1311         of arbitrary sign the result of the operation is 2.0. */
1312      Iop_RecipStep32Fx4,
1313
1314      /* Vector Reciprocal Square Root Estimate finds an approximate reciprocal
1315         square root of each element in the operand vector. */
1316      Iop_RSqrtEst32Fx4,
1317
1318      /* Vector Reciprocal Square Root Step computes (3.0 - arg1 * arg2) / 2.0.
1319         Note, that of one of the arguments is zero and another one is infiinty
1320         of arbitrary sign the result of the operation is 1.5. */
1321      Iop_RSqrtStep32Fx4,
1322
1323      /* --- Int to/from FP conversion --- */
1324      /* Unlike the standard fp conversions, these irops take no
1325         rounding mode argument. Instead the irop trailers _R{M,P,N,Z}
1326         indicate the mode: {-inf, +inf, nearest, zero} respectively. */
1327      Iop_I32UtoFx4,     Iop_I32StoFx4,       /* I32x4 -> F32x4       */
1328      Iop_FtoI32Ux4_RZ,  Iop_FtoI32Sx4_RZ,    /* F32x4 -> I32x4       */
1329      Iop_QFtoI32Ux4_RZ, Iop_QFtoI32Sx4_RZ,   /* F32x4 -> I32x4 (saturating) */
1330      Iop_RoundF32x4_RM, Iop_RoundF32x4_RP,   /* round to fp integer  */
1331      Iop_RoundF32x4_RN, Iop_RoundF32x4_RZ,   /* round to fp integer  */
1332      /* Fixed32 format is floating-point number with fixed number of fraction
1333         bits. The number of fraction bits is passed as a second argument of
1334         type I8. */
1335      Iop_F32ToFixed32Ux4_RZ, Iop_F32ToFixed32Sx4_RZ, /* fp -> fixed-point */
1336      Iop_Fixed32UToF32x4_RN, Iop_Fixed32SToF32x4_RN, /* fixed-point -> fp */
1337
1338      /* --- Single to/from half conversion --- */
1339      /* FIXME: what kind of rounding in F32x4 -> F16x4 case? */
1340      Iop_F32toF16x4, Iop_F16toF32x4,         /* F32x4 <-> F16x4      */
1341
1342      /* --- 32x4 lowest-lane-only scalar FP --- */
1343
1344      /* In binary cases, upper 3/4 is copied from first operand.  In
1345         unary cases, upper 3/4 is copied from the operand. */
1346
1347      /* binary */
1348      Iop_Add32F0x4, Iop_Sub32F0x4, Iop_Mul32F0x4, Iop_Div32F0x4,
1349      Iop_Max32F0x4, Iop_Min32F0x4,
1350      Iop_CmpEQ32F0x4, Iop_CmpLT32F0x4, Iop_CmpLE32F0x4, Iop_CmpUN32F0x4,
1351
1352      /* unary */
1353      Iop_RecipEst32F0x4, Iop_Sqrt32F0x4, Iop_RSqrtEst32F0x4,
1354
1355      /* --- 64x2 vector FP --- */
1356
1357      /* ternary :: IRRoundingMode(I32) x V128 x V128 -> V128 */
1358      Iop_Add64Fx2, Iop_Sub64Fx2, Iop_Mul64Fx2, Iop_Div64Fx2,
1359
1360      /* binary */
1361      Iop_Max64Fx2, Iop_Min64Fx2,
1362      Iop_CmpEQ64Fx2, Iop_CmpLT64Fx2, Iop_CmpLE64Fx2, Iop_CmpUN64Fx2,
1363
1364      /* unary */
1365      Iop_Abs64Fx2,
1366      Iop_Neg64Fx2,
1367
1368      /* binary :: IRRoundingMode(I32) x V128 -> V128 */
1369      Iop_Sqrt64Fx2,
1370
1371      /* see 32Fx4 variants for description */
1372      Iop_RecipEst64Fx2,    // unary
1373      Iop_RecipStep64Fx2,   // binary
1374      Iop_RSqrtEst64Fx2,    // unary
1375      Iop_RSqrtStep64Fx2,   // binary
1376
1377      /* --- 64x2 lowest-lane-only scalar FP --- */
1378
1379      /* In binary cases, upper half is copied from first operand.  In
1380         unary cases, upper half is copied from the operand. */
1381
1382      /* binary */
1383      Iop_Add64F0x2, Iop_Sub64F0x2, Iop_Mul64F0x2, Iop_Div64F0x2,
1384      Iop_Max64F0x2, Iop_Min64F0x2,
1385      Iop_CmpEQ64F0x2, Iop_CmpLT64F0x2, Iop_CmpLE64F0x2, Iop_CmpUN64F0x2,
1386
1387      /* unary */
1388      Iop_Sqrt64F0x2,
1389
1390      /* --- pack / unpack --- */
1391
1392      /* 64 <-> 128 bit vector */
1393      Iop_V128to64,     // :: V128 -> I64, low half
1394      Iop_V128HIto64,   // :: V128 -> I64, high half
1395      Iop_64HLtoV128,   // :: (I64,I64) -> V128
1396
1397      Iop_64UtoV128,
1398      Iop_SetV128lo64,
1399
1400      /* Copies lower 64/32/16/8 bits, zeroes out the rest. */
1401      Iop_ZeroHI64ofV128,    // :: V128 -> V128
1402      Iop_ZeroHI96ofV128,    // :: V128 -> V128
1403      Iop_ZeroHI112ofV128,   // :: V128 -> V128
1404      Iop_ZeroHI120ofV128,   // :: V128 -> V128
1405
1406      /* 32 <-> 128 bit vector */
1407      Iop_32UtoV128,
1408      Iop_V128to32,     // :: V128 -> I32, lowest lane
1409      Iop_SetV128lo32,  // :: (V128,I32) -> V128
1410
1411      /* ------------------ 128-bit SIMD Integer. ------------------ */
1412
1413      /* BITWISE OPS */
1414      Iop_NotV128,
1415      Iop_AndV128, Iop_OrV128, Iop_XorV128,
1416
1417      /* VECTOR SHIFT (shift amt :: Ity_I8) */
1418      Iop_ShlV128, Iop_ShrV128,
1419
1420      /* MISC (vector integer cmp != 0) */
1421      Iop_CmpNEZ8x16, Iop_CmpNEZ16x8, Iop_CmpNEZ32x4, Iop_CmpNEZ64x2,
1422
1423      /* ADDITION (normal / U->U sat / S->S sat) */
1424      Iop_Add8x16,    Iop_Add16x8,    Iop_Add32x4,    Iop_Add64x2,
1425      Iop_QAdd8Ux16,  Iop_QAdd16Ux8,  Iop_QAdd32Ux4,  Iop_QAdd64Ux2,
1426      Iop_QAdd8Sx16,  Iop_QAdd16Sx8,  Iop_QAdd32Sx4,  Iop_QAdd64Sx2,
1427
1428      /* ADDITION, ARM64 specific saturating variants. */
1429      /* Unsigned widen left arg, signed widen right arg, add, saturate S->S.
1430         This corresponds to SUQADD. */
1431      Iop_QAddExtUSsatSS8x16, Iop_QAddExtUSsatSS16x8,
1432      Iop_QAddExtUSsatSS32x4, Iop_QAddExtUSsatSS64x2,
1433      /* Signed widen left arg, unsigned widen right arg, add, saturate U->U.
1434         This corresponds to USQADD. */
1435      Iop_QAddExtSUsatUU8x16, Iop_QAddExtSUsatUU16x8,
1436      Iop_QAddExtSUsatUU32x4, Iop_QAddExtSUsatUU64x2,
1437
1438      /* SUBTRACTION (normal / unsigned sat / signed sat) */
1439      Iop_Sub8x16,   Iop_Sub16x8,   Iop_Sub32x4,   Iop_Sub64x2,
1440      Iop_QSub8Ux16, Iop_QSub16Ux8, Iop_QSub32Ux4, Iop_QSub64Ux2,
1441      Iop_QSub8Sx16, Iop_QSub16Sx8, Iop_QSub32Sx4, Iop_QSub64Sx2,
1442
1443      /* MULTIPLICATION (normal / high half of signed/unsigned) */
1444      Iop_Mul8x16,  Iop_Mul16x8,    Iop_Mul32x4,
1445                    Iop_MulHi16Ux8, Iop_MulHi32Ux4,
1446                    Iop_MulHi16Sx8, Iop_MulHi32Sx4,
1447      /* (widening signed/unsigned of even lanes, with lowest lane=zero) */
1448      Iop_MullEven8Ux16, Iop_MullEven16Ux8, Iop_MullEven32Ux4,
1449      Iop_MullEven8Sx16, Iop_MullEven16Sx8, Iop_MullEven32Sx4,
1450
1451      /* Widening multiplies, all of the form (I64, I64) -> V128 */
1452      Iop_Mull8Ux8, Iop_Mull8Sx8,
1453      Iop_Mull16Ux4, Iop_Mull16Sx4,
1454      Iop_Mull32Ux2, Iop_Mull32Sx2,
1455
1456      /* Signed doubling saturating widening multiplies, (I64, I64) -> V128 */
1457      Iop_QDMull16Sx4, Iop_QDMull32Sx2,
1458
1459      /* Vector Saturating Doubling Multiply Returning High Half and
1460         Vector Saturating Rounding Doubling Multiply Returning High Half.
1461         These IROps multiply corresponding elements in two vectors, double
1462         the results, and place the most significant half of the final results
1463         in the destination vector.  The results are truncated or rounded.  If
1464         any of the results overflow, they are saturated.  To be more precise,
1465         for each lane, the computed result is:
1466           QDMulHi:
1467             hi-half( sign-extend(laneL) *q sign-extend(laneR) *q 2 )
1468           QRDMulHi:
1469             hi-half( sign-extend(laneL) *q sign-extend(laneR) *q 2
1470                      +q (1 << (lane-width-in-bits - 1)) )
1471      */
1472      Iop_QDMulHi16Sx8,  Iop_QDMulHi32Sx4,  /* (V128, V128) -> V128 */
1473      Iop_QRDMulHi16Sx8, Iop_QRDMulHi32Sx4, /* (V128, V128) -> V128 */
1474
1475      /* Polynomial multiplication treats its arguments as
1476         coefficients of polynomials over {0, 1}. */
1477      Iop_PolynomialMul8x16, /* (V128, V128) -> V128 */
1478      Iop_PolynomialMull8x8, /*   (I64, I64) -> V128 */
1479
1480      /* Vector Polynomial multiplication add.   (V128, V128) -> V128
1481
1482       *** Below is the algorithm for the instructions. These Iops could
1483           be emulated to get this functionality, but the emulation would
1484           be long and messy.
1485
1486        Example for polynomial multiply add for vector of bytes
1487        do i = 0 to 15
1488            prod[i].bit[0:14] <- 0
1489            srcA <- VR[argL].byte[i]
1490            srcB <- VR[argR].byte[i]
1491            do j = 0 to 7
1492                do k = 0 to j
1493                    gbit <- srcA.bit[k] & srcB.bit[j-k]
1494                    prod[i].bit[j] <- prod[i].bit[j] ^ gbit
1495                end
1496            end
1497
1498            do j = 8 to 14
1499                do k = j-7 to 7
1500                     gbit <- (srcA.bit[k] & srcB.bit[j-k])
1501                     prod[i].bit[j] <- prod[i].bit[j] ^ gbit
1502                end
1503            end
1504        end
1505
1506        do i = 0 to 7
1507            VR[dst].hword[i] <- 0b0 || (prod[2×i] ^ prod[2×i+1])
1508        end
1509      */
1510      Iop_PolynomialMulAdd8x16, Iop_PolynomialMulAdd16x8,
1511      Iop_PolynomialMulAdd32x4, Iop_PolynomialMulAdd64x2,
1512
1513      /* PAIRWISE operations */
1514      /* Iop_PwFoo16x4( [a,b,c,d], [e,f,g,h] ) =
1515            [Foo16(a,b), Foo16(c,d), Foo16(e,f), Foo16(g,h)] */
1516      Iop_PwAdd8x16, Iop_PwAdd16x8, Iop_PwAdd32x4,
1517      Iop_PwAdd32Fx2,
1518      /* Longening variant is unary. The resulting vector contains two times
1519         less elements than operand, but they are two times wider.
1520         Example:
1521            Iop_PwAddL16Ux4( [a,b,c,d] ) = [a+b,c+d]
1522               where a+b and c+d are unsigned 32-bit values. */
1523      Iop_PwAddL8Ux16, Iop_PwAddL16Ux8, Iop_PwAddL32Ux4,
1524      Iop_PwAddL8Sx16, Iop_PwAddL16Sx8, Iop_PwAddL32Sx4,
1525
1526      /* Other unary pairwise ops */
1527
1528      /* Vector bit matrix transpose.  (V128) -> V128 */
1529      /* For each doubleword element of the source vector, an 8-bit x 8-bit
1530       * matrix transpose is performed. */
1531      Iop_PwBitMtxXpose64x2,
1532
1533      /* ABSOLUTE VALUE */
1534      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4, Iop_Abs64x2,
1535
1536      /* AVERAGING: note: (arg1 + arg2 + 1) >>u 1 */
1537      Iop_Avg8Ux16, Iop_Avg16Ux8, Iop_Avg32Ux4,
1538      Iop_Avg8Sx16, Iop_Avg16Sx8, Iop_Avg32Sx4,
1539
1540      /* MIN/MAX */
1541      Iop_Max8Sx16, Iop_Max16Sx8, Iop_Max32Sx4, Iop_Max64Sx2,
1542      Iop_Max8Ux16, Iop_Max16Ux8, Iop_Max32Ux4, Iop_Max64Ux2,
1543      Iop_Min8Sx16, Iop_Min16Sx8, Iop_Min32Sx4, Iop_Min64Sx2,
1544      Iop_Min8Ux16, Iop_Min16Ux8, Iop_Min32Ux4, Iop_Min64Ux2,
1545
1546      /* COMPARISON */
1547      Iop_CmpEQ8x16,  Iop_CmpEQ16x8,  Iop_CmpEQ32x4,  Iop_CmpEQ64x2,
1548      Iop_CmpGT8Sx16, Iop_CmpGT16Sx8, Iop_CmpGT32Sx4, Iop_CmpGT64Sx2,
1549      Iop_CmpGT8Ux16, Iop_CmpGT16Ux8, Iop_CmpGT32Ux4, Iop_CmpGT64Ux2,
1550
1551      /* COUNT ones / leading zeroes / leading sign bits (not including topmost
1552         bit) */
1553      Iop_Cnt8x16,
1554      Iop_Clz8x16, Iop_Clz16x8, Iop_Clz32x4,
1555      Iop_Cls8x16, Iop_Cls16x8, Iop_Cls32x4,
1556
1557      /* VECTOR x SCALAR SHIFT (shift amt :: Ity_I8) */
1558      Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4, Iop_ShlN64x2,
1559      Iop_ShrN8x16, Iop_ShrN16x8, Iop_ShrN32x4, Iop_ShrN64x2,
1560      Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4, Iop_SarN64x2,
1561
1562      /* VECTOR x VECTOR SHIFT / ROTATE */
1563      /* FIXME: I'm pretty sure the ARM32 front/back ends interpret these
1564         differently from all other targets.  The intention is that
1565         the shift amount (2nd arg) is interpreted as unsigned and
1566         only the lowest log2(lane-bits) bits are relevant.  But the
1567         ARM32 versions treat the shift amount as an 8 bit signed
1568         number.  The ARM32 uses should be replaced by the relevant
1569         vector x vector bidirectional shifts instead. */
1570      Iop_Shl8x16, Iop_Shl16x8, Iop_Shl32x4, Iop_Shl64x2,
1571      Iop_Shr8x16, Iop_Shr16x8, Iop_Shr32x4, Iop_Shr64x2,
1572      Iop_Sar8x16, Iop_Sar16x8, Iop_Sar32x4, Iop_Sar64x2,
1573      Iop_Sal8x16, Iop_Sal16x8, Iop_Sal32x4, Iop_Sal64x2,
1574      Iop_Rol8x16, Iop_Rol16x8, Iop_Rol32x4, Iop_Rol64x2,
1575
1576      /* VECTOR x VECTOR SATURATING SHIFT */
1577      Iop_QShl8x16, Iop_QShl16x8, Iop_QShl32x4, Iop_QShl64x2,
1578      Iop_QSal8x16, Iop_QSal16x8, Iop_QSal32x4, Iop_QSal64x2,
1579      /* VECTOR x INTEGER SATURATING SHIFT */
1580      Iop_QShlNsatSU8x16, Iop_QShlNsatSU16x8,
1581      Iop_QShlNsatSU32x4, Iop_QShlNsatSU64x2,
1582      Iop_QShlNsatUU8x16, Iop_QShlNsatUU16x8,
1583      Iop_QShlNsatUU32x4, Iop_QShlNsatUU64x2,
1584      Iop_QShlNsatSS8x16, Iop_QShlNsatSS16x8,
1585      Iop_QShlNsatSS32x4, Iop_QShlNsatSS64x2,
1586
1587      /* VECTOR x VECTOR BIDIRECTIONAL SATURATING (& MAYBE ROUNDING) SHIFT */
1588      /* All of type (V128, V128) -> V256. */
1589      /* The least significant 8 bits of each lane of the second
1590         operand are used as the shift amount, and interpreted signedly.
1591         Positive values mean a shift left, negative a shift right.  The
1592         result is signedly or unsignedly saturated.  There are also
1593         rounding variants, which add 2^(shift_amount-1) to the value before
1594         shifting, but only in the shift-right case.  Vacated positions
1595         are filled with zeroes.  IOW, it's either SHR or SHL, but not SAR.
1596
1597         These operations return 129 bits: one bit ("Q") indicating whether
1598         saturation occurred, and the shift result.  The result type is V256,
1599         of which the lower V128 is the shift result, and Q occupies the
1600         least significant bit of the upper V128.  All other bits of the
1601         upper V128 are zero. */
1602      // Unsigned saturation, no rounding
1603      Iop_QandUQsh8x16, Iop_QandUQsh16x8,
1604      Iop_QandUQsh32x4, Iop_QandUQsh64x2,
1605      // Signed saturation, no rounding
1606      Iop_QandSQsh8x16, Iop_QandSQsh16x8,
1607      Iop_QandSQsh32x4, Iop_QandSQsh64x2,
1608
1609      // Unsigned saturation, rounding
1610      Iop_QandUQRsh8x16, Iop_QandUQRsh16x8,
1611      Iop_QandUQRsh32x4, Iop_QandUQRsh64x2,
1612      // Signed saturation, rounding
1613      Iop_QandSQRsh8x16, Iop_QandSQRsh16x8,
1614      Iop_QandSQRsh32x4, Iop_QandSQRsh64x2,
1615
1616      /* VECTOR x VECTOR BIDIRECTIONAL (& MAYBE ROUNDING) SHIFT */
1617      /* All of type (V128, V128) -> V128 */
1618      /* The least significant 8 bits of each lane of the second
1619         operand are used as the shift amount, and interpreted signedly.
1620         Positive values mean a shift left, negative a shift right.
1621         There are also rounding variants, which add 2^(shift_amount-1)
1622         to the value before shifting, but only in the shift-right case.
1623
1624         For left shifts, the vacated places are filled with zeroes.
1625         For right shifts, the vacated places are filled with zeroes
1626         for the U variants and sign bits for the S variants. */
1627      // Signed and unsigned, non-rounding
1628      Iop_Sh8Sx16, Iop_Sh16Sx8, Iop_Sh32Sx4, Iop_Sh64Sx2,
1629      Iop_Sh8Ux16, Iop_Sh16Ux8, Iop_Sh32Ux4, Iop_Sh64Ux2,
1630
1631      // Signed and unsigned, rounding
1632      Iop_Rsh8Sx16, Iop_Rsh16Sx8, Iop_Rsh32Sx4, Iop_Rsh64Sx2,
1633      Iop_Rsh8Ux16, Iop_Rsh16Ux8, Iop_Rsh32Ux4, Iop_Rsh64Ux2,
1634
1635      /* The least significant 8 bits of each lane of the second
1636         operand are used as the shift amount, and interpreted signedly.
1637         Positive values mean a shift left, negative a shift right.  The
1638         result is signedly or unsignedly saturated.  There are also
1639         rounding variants, which add 2^(shift_amount-1) to the value before
1640         shifting, but only in the shift-right case.  Vacated positions
1641         are filled with zeroes.  IOW, it's either SHR or SHL, but not SAR.
1642      */
1643
1644      /* VECTOR x SCALAR SATURATING (& MAYBE ROUNDING) NARROWING SHIFT RIGHT */
1645      /* All of type (V128, I8) -> V128 */
1646      /* The first argument is shifted right, then narrowed to half the width
1647         by saturating it.  The second argument is a scalar shift amount that
1648         applies to all lanes, and must be a value in the range 1 to lane_width.
1649         The shift may be done signedly (Sar variants) or unsignedly (Shr
1650         variants).  The saturation is done according to the two signedness
1651         indicators at the end of the name.  For example 64Sto32U means a
1652         signed 64 bit value is saturated into an unsigned 32 bit value.
1653         Additionally, the QRS variants do rounding, that is, they add the
1654         value (1 << (shift_amount-1)) to each source lane before shifting.
1655
1656         These operations return 65 bits: one bit ("Q") indicating whether
1657         saturation occurred, and the shift result.  The result type is V128,
1658         of which the lower half is the shift result, and Q occupies the
1659         least significant bit of the upper half.  All other bits of the
1660         upper half are zero. */
1661      // No rounding, sat U->U
1662      Iop_QandQShrNnarrow16Uto8Ux8,
1663      Iop_QandQShrNnarrow32Uto16Ux4, Iop_QandQShrNnarrow64Uto32Ux2,
1664      // No rounding, sat S->S
1665      Iop_QandQSarNnarrow16Sto8Sx8,
1666      Iop_QandQSarNnarrow32Sto16Sx4, Iop_QandQSarNnarrow64Sto32Sx2,
1667      // No rounding, sat S->U
1668      Iop_QandQSarNnarrow16Sto8Ux8,
1669      Iop_QandQSarNnarrow32Sto16Ux4, Iop_QandQSarNnarrow64Sto32Ux2,
1670
1671      // Rounding, sat U->U
1672      Iop_QandQRShrNnarrow16Uto8Ux8,
1673      Iop_QandQRShrNnarrow32Uto16Ux4, Iop_QandQRShrNnarrow64Uto32Ux2,
1674      // Rounding, sat S->S
1675      Iop_QandQRSarNnarrow16Sto8Sx8,
1676      Iop_QandQRSarNnarrow32Sto16Sx4, Iop_QandQRSarNnarrow64Sto32Sx2,
1677      // Rounding, sat S->U
1678      Iop_QandQRSarNnarrow16Sto8Ux8,
1679      Iop_QandQRSarNnarrow32Sto16Ux4, Iop_QandQRSarNnarrow64Sto32Ux2,
1680
1681      /* NARROWING (binary)
1682         -- narrow 2xV128 into 1xV128, hi half from left arg */
1683      /* See comments above w.r.t. U vs S issues in saturated narrowing. */
1684      Iop_QNarrowBin16Sto8Ux16, Iop_QNarrowBin32Sto16Ux8,
1685      Iop_QNarrowBin16Sto8Sx16, Iop_QNarrowBin32Sto16Sx8,
1686      Iop_QNarrowBin16Uto8Ux16, Iop_QNarrowBin32Uto16Ux8,
1687      Iop_NarrowBin16to8x16, Iop_NarrowBin32to16x8,
1688      Iop_QNarrowBin64Sto32Sx4, Iop_QNarrowBin64Uto32Ux4,
1689      Iop_NarrowBin64to32x4,
1690
1691      /* NARROWING (unary) -- narrow V128 into I64 */
1692      Iop_NarrowUn16to8x8, Iop_NarrowUn32to16x4, Iop_NarrowUn64to32x2,
1693      /* Saturating narrowing from signed source to signed/unsigned
1694         destination */
1695      Iop_QNarrowUn16Sto8Sx8, Iop_QNarrowUn32Sto16Sx4, Iop_QNarrowUn64Sto32Sx2,
1696      Iop_QNarrowUn16Sto8Ux8, Iop_QNarrowUn32Sto16Ux4, Iop_QNarrowUn64Sto32Ux2,
1697      /* Saturating narrowing from unsigned source to unsigned destination */
1698      Iop_QNarrowUn16Uto8Ux8, Iop_QNarrowUn32Uto16Ux4, Iop_QNarrowUn64Uto32Ux2,
1699
1700      /* WIDENING -- sign or zero extend each element of the argument
1701         vector to the twice original size.  The resulting vector consists of
1702         the same number of elements but each element and the vector itself
1703         are twice as wide.
1704         All operations are I64->V128.
1705         Example
1706            Iop_Widen32Sto64x2( [a, b] ) = [c, d]
1707               where c = Iop_32Sto64(a) and d = Iop_32Sto64(b) */
1708      Iop_Widen8Uto16x8, Iop_Widen16Uto32x4, Iop_Widen32Uto64x2,
1709      Iop_Widen8Sto16x8, Iop_Widen16Sto32x4, Iop_Widen32Sto64x2,
1710
1711      /* INTERLEAVING */
1712      /* Interleave lanes from low or high halves of
1713         operands.  Most-significant result lane is from the left
1714         arg. */
1715      Iop_InterleaveHI8x16, Iop_InterleaveHI16x8,
1716      Iop_InterleaveHI32x4, Iop_InterleaveHI64x2,
1717      Iop_InterleaveLO8x16, Iop_InterleaveLO16x8,
1718      Iop_InterleaveLO32x4, Iop_InterleaveLO64x2,
1719      /* Interleave odd/even lanes of operands.  Most-significant result lane
1720         is from the left arg. */
1721      Iop_InterleaveOddLanes8x16, Iop_InterleaveEvenLanes8x16,
1722      Iop_InterleaveOddLanes16x8, Iop_InterleaveEvenLanes16x8,
1723      Iop_InterleaveOddLanes32x4, Iop_InterleaveEvenLanes32x4,
1724
1725      /* CONCATENATION -- build a new value by concatenating either
1726         the even or odd lanes of both operands.  Note that
1727         Cat{Odd,Even}Lanes64x2 are identical to Interleave{HI,LO}64x2
1728         and so are omitted. */
1729      Iop_CatOddLanes8x16, Iop_CatOddLanes16x8, Iop_CatOddLanes32x4,
1730      Iop_CatEvenLanes8x16, Iop_CatEvenLanes16x8, Iop_CatEvenLanes32x4,
1731
1732      /* GET elements of VECTOR
1733         GET is binop (V128, I8) -> I<elem_size> */
1734      /* Note: the arm back-end handles only constant second argument. */
1735      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4, Iop_GetElem64x2,
1736
1737      /* DUPLICATING -- copy value to all lanes */
1738      Iop_Dup8x16,   Iop_Dup16x8,   Iop_Dup32x4,
1739
1740      /* SLICE -- produces the lowest 128 bits of (arg1:arg2) >> (8 * arg3).
1741         arg3 is a shift amount in bytes and may be between 0 and 16
1742         inclusive.  When 0, the result is arg2; when 16, the result is arg1.
1743         Not all back ends handle all values.  The arm64 back
1744         end handles only immediate arg3 values. */
1745      Iop_SliceV128,  // (V128, V128, I8) -> V128
1746
1747      /* REVERSE the order of chunks in vector lanes.  Chunks must be
1748         smaller than the vector lanes (obviously) and so may be 8-,
1749         16- and 32-bit in size.  See definitions of 64-bit SIMD
1750         versions above for examples. */
1751      Iop_Reverse8sIn16_x8,
1752      Iop_Reverse8sIn32_x4, Iop_Reverse16sIn32_x4,
1753      Iop_Reverse8sIn64_x2, Iop_Reverse16sIn64_x2, Iop_Reverse32sIn64_x2,
1754      Iop_Reverse1sIn8_x16, /* Reverse bits in each byte lane. */
1755
1756      /* PERMUTING -- copy src bytes to dst,
1757         as indexed by control vector bytes:
1758            for i in 0 .. 15 . result[i] = argL[ argR[i] ]
1759         argR[i] values may only be in the range 0 .. 15, else behaviour
1760         is undefined. */
1761      Iop_Perm8x16,
1762      Iop_Perm32x4, /* ditto, except argR values are restricted to 0 .. 3 */
1763
1764      /* MISC CONVERSION -- get high bits of each byte lane, a la
1765         x86/amd64 pmovmskb */
1766      Iop_GetMSBs8x16, /* V128 -> I16 */
1767
1768      /* Vector Reciprocal Estimate and Vector Reciprocal Square Root Estimate
1769         See floating-point equivalents for details. */
1770      Iop_RecipEst32Ux4, Iop_RSqrtEst32Ux4,
1771
1772      /* ------------------ 256-bit SIMD Integer. ------------------ */
1773
1774      /* Pack/unpack */
1775      Iop_V256to64_0,  // V256 -> I64, extract least significant lane
1776      Iop_V256to64_1,
1777      Iop_V256to64_2,
1778      Iop_V256to64_3,  // V256 -> I64, extract most significant lane
1779
1780      Iop_64x4toV256,  // (I64,I64,I64,I64)->V256
1781                       // first arg is most significant lane
1782
1783      Iop_V256toV128_0, // V256 -> V128, less significant lane
1784      Iop_V256toV128_1, // V256 -> V128, more significant lane
1785      Iop_V128HLtoV256, // (V128,V128)->V256, first arg is most signif
1786
1787      Iop_AndV256,
1788      Iop_OrV256,
1789      Iop_XorV256,
1790      Iop_NotV256,
1791
1792      /* MISC (vector integer cmp != 0) */
1793      Iop_CmpNEZ8x32, Iop_CmpNEZ16x16, Iop_CmpNEZ32x8, Iop_CmpNEZ64x4,
1794
1795      Iop_Add8x32,    Iop_Add16x16,    Iop_Add32x8,    Iop_Add64x4,
1796      Iop_Sub8x32,    Iop_Sub16x16,    Iop_Sub32x8,    Iop_Sub64x4,
1797
1798      Iop_CmpEQ8x32,  Iop_CmpEQ16x16,  Iop_CmpEQ32x8,  Iop_CmpEQ64x4,
1799      Iop_CmpGT8Sx32, Iop_CmpGT16Sx16, Iop_CmpGT32Sx8, Iop_CmpGT64Sx4,
1800
1801      Iop_ShlN16x16, Iop_ShlN32x8, Iop_ShlN64x4,
1802      Iop_ShrN16x16, Iop_ShrN32x8, Iop_ShrN64x4,
1803      Iop_SarN16x16, Iop_SarN32x8,
1804
1805      Iop_Max8Sx32, Iop_Max16Sx16, Iop_Max32Sx8,
1806      Iop_Max8Ux32, Iop_Max16Ux16, Iop_Max32Ux8,
1807      Iop_Min8Sx32, Iop_Min16Sx16, Iop_Min32Sx8,
1808      Iop_Min8Ux32, Iop_Min16Ux16, Iop_Min32Ux8,
1809
1810      Iop_Mul16x16, Iop_Mul32x8,
1811      Iop_MulHi16Ux16, Iop_MulHi16Sx16,
1812
1813      Iop_QAdd8Ux32, Iop_QAdd16Ux16,
1814      Iop_QAdd8Sx32, Iop_QAdd16Sx16,
1815      Iop_QSub8Ux32, Iop_QSub16Ux16,
1816      Iop_QSub8Sx32, Iop_QSub16Sx16,
1817
1818      Iop_Avg8Ux32, Iop_Avg16Ux16,
1819
1820      Iop_Perm32x8,
1821
1822      /* (V128, V128) -> V128 */
1823      Iop_CipherV128, Iop_CipherLV128, Iop_CipherSV128,
1824      Iop_NCipherV128, Iop_NCipherLV128,
1825
1826      /* Hash instructions, Federal Information Processing Standards
1827       * Publication 180-3 Secure Hash Standard. */
1828      /* (V128, I8) -> V128; The I8 input arg is (ST | SIX), where ST and
1829       * SIX are fields from the insn. See ISA 2.07 description of
1830       * vshasigmad and vshasigmaw insns.*/
1831      Iop_SHA512, Iop_SHA256,
1832
1833      /* ------------------ 256-bit SIMD FP. ------------------ */
1834
1835      /* ternary :: IRRoundingMode(I32) x V256 x V256 -> V256 */
1836      Iop_Add64Fx4, Iop_Sub64Fx4, Iop_Mul64Fx4, Iop_Div64Fx4,
1837      Iop_Add32Fx8, Iop_Sub32Fx8, Iop_Mul32Fx8, Iop_Div32Fx8,
1838
1839      Iop_Sqrt32Fx8,
1840      Iop_Sqrt64Fx4,
1841      Iop_RSqrtEst32Fx8,
1842      Iop_RecipEst32Fx8,
1843
1844      Iop_Max32Fx8, Iop_Min32Fx8,
1845      Iop_Max64Fx4, Iop_Min64Fx4,
1846      Iop_LAST      /* must be the last enumerator */
1847   }
1848   IROp;
1849
1850/* Pretty-print an op. */
1851extern void ppIROp ( IROp );
1852
1853/* For a given operand return the types of its arguments and its result. */
1854extern void typeOfPrimop ( IROp op,
1855                           /*OUTs*/ IRType* t_dst, IRType* t_arg1,
1856                           IRType* t_arg2, IRType* t_arg3, IRType* t_arg4 );
1857
1858/* Encoding of IEEE754-specified rounding modes.
1859   Note, various front and back ends rely on the actual numerical
1860   values of these, so do not change them. */
1861typedef
1862   enum {
1863      Irrm_NEAREST              = 0,  // Round to nearest, ties to even
1864      Irrm_NegINF               = 1,  // Round to negative infinity
1865      Irrm_PosINF               = 2,  // Round to positive infinity
1866      Irrm_ZERO                 = 3,  // Round toward zero
1867      Irrm_NEAREST_TIE_AWAY_0   = 4,  // Round to nearest, ties away from 0
1868      Irrm_PREPARE_SHORTER      = 5,  // Round to prepare for shorter
1869                                      // precision
1870      Irrm_AWAY_FROM_ZERO       = 6,  // Round to away from 0
1871      Irrm_NEAREST_TIE_TOWARD_0 = 7   // Round to nearest, ties towards 0
1872   }
1873   IRRoundingMode;
1874
1875/* Binary floating point comparison result values.
1876   This is also derived from what IA32 does. */
1877typedef
1878   enum {
1879      Ircr_UN = 0x45,
1880      Ircr_LT = 0x01,
1881      Ircr_GT = 0x00,
1882      Ircr_EQ = 0x40
1883   }
1884   IRCmpFResult;
1885
1886typedef IRCmpFResult IRCmpF32Result;
1887typedef IRCmpFResult IRCmpF64Result;
1888typedef IRCmpFResult IRCmpF128Result;
1889
1890/* Decimal floating point result values. */
1891typedef IRCmpFResult IRCmpDResult;
1892typedef IRCmpDResult IRCmpD64Result;
1893typedef IRCmpDResult IRCmpD128Result;
1894
1895/* ------------------ Expressions ------------------ */
1896
1897typedef struct _IRQop   IRQop;   /* forward declaration */
1898typedef struct _IRTriop IRTriop; /* forward declaration */
1899
1900
1901/* The different kinds of expressions.  Their meaning is explained below
1902   in the comments for IRExpr. */
1903typedef
1904   enum {
1905      Iex_Binder=0x1900,
1906      Iex_Get,
1907      Iex_GetI,
1908      Iex_RdTmp,
1909      Iex_Qop,
1910      Iex_Triop,
1911      Iex_Binop,
1912      Iex_Unop,
1913      Iex_Load,
1914      Iex_Const,
1915      Iex_ITE,
1916      Iex_CCall,
1917      Iex_VECRET,
1918      Iex_BBPTR
1919   }
1920   IRExprTag;
1921
1922/* An expression.  Stored as a tagged union.  'tag' indicates what kind
1923   of expression this is.  'Iex' is the union that holds the fields.  If
1924   an IRExpr 'e' has e.tag equal to Iex_Load, then it's a load
1925   expression, and the fields can be accessed with
1926   'e.Iex.Load.<fieldname>'.
1927
1928   For each kind of expression, we show what it looks like when
1929   pretty-printed with ppIRExpr().
1930*/
1931typedef
1932   struct _IRExpr
1933   IRExpr;
1934
1935struct _IRExpr {
1936   IRExprTag tag;
1937   union {
1938      /* Used only in pattern matching within Vex.  Should not be seen
1939         outside of Vex. */
1940      struct {
1941         Int binder;
1942      } Binder;
1943
1944      /* Read a guest register, at a fixed offset in the guest state.
1945         ppIRExpr output: GET:<ty>(<offset>), eg. GET:I32(0)
1946      */
1947      struct {
1948         Int    offset;    /* Offset into the guest state */
1949         IRType ty;        /* Type of the value being read */
1950      } Get;
1951
1952      /* Read a guest register at a non-fixed offset in the guest
1953         state.  This allows circular indexing into parts of the guest
1954         state, which is essential for modelling situations where the
1955         identity of guest registers is not known until run time.  One
1956         example is the x87 FP register stack.
1957
1958         The part of the guest state to be treated as a circular array
1959         is described in the IRRegArray 'descr' field.  It holds the
1960         offset of the first element in the array, the type of each
1961         element, and the number of elements.
1962
1963         The array index is indicated rather indirectly, in a way
1964         which makes optimisation easy: as the sum of variable part
1965         (the 'ix' field) and a constant offset (the 'bias' field).
1966
1967         Since the indexing is circular, the actual array index to use
1968         is computed as (ix + bias) % num-of-elems-in-the-array.
1969
1970         Here's an example.  The description
1971
1972            (96:8xF64)[t39,-7]
1973
1974         describes an array of 8 F64-typed values, the
1975         guest-state-offset of the first being 96.  This array is
1976         being indexed at (t39 - 7) % 8.
1977
1978         It is important to get the array size/type exactly correct
1979         since IR optimisation looks closely at such info in order to
1980         establish aliasing/non-aliasing between seperate GetI and
1981         PutI events, which is used to establish when they can be
1982         reordered, etc.  Putting incorrect info in will lead to
1983         obscure IR optimisation bugs.
1984
1985            ppIRExpr output: GETI<descr>[<ix>,<bias]
1986                         eg. GETI(128:8xI8)[t1,0]
1987      */
1988      struct {
1989         IRRegArray* descr; /* Part of guest state treated as circular */
1990         IRExpr*     ix;    /* Variable part of index into array */
1991         Int         bias;  /* Constant offset part of index into array */
1992      } GetI;
1993
1994      /* The value held by a temporary.
1995         ppIRExpr output: t<tmp>, eg. t1
1996      */
1997      struct {
1998         IRTemp tmp;       /* The temporary number */
1999      } RdTmp;
2000
2001      /* A quaternary operation.
2002         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>, <arg4>),
2003                      eg. MAddF64r32(t1, t2, t3, t4)
2004      */
2005      struct {
2006        IRQop* details;
2007      } Qop;
2008
2009      /* A ternary operation.
2010         ppIRExpr output: <op>(<arg1>, <arg2>, <arg3>),
2011                      eg. MulF64(1, 2.0, 3.0)
2012      */
2013      struct {
2014        IRTriop* details;
2015      } Triop;
2016
2017      /* A binary operation.
2018         ppIRExpr output: <op>(<arg1>, <arg2>), eg. Add32(t1,t2)
2019      */
2020      struct {
2021         IROp op;          /* op-code   */
2022         IRExpr* arg1;     /* operand 1 */
2023         IRExpr* arg2;     /* operand 2 */
2024      } Binop;
2025
2026      /* A unary operation.
2027         ppIRExpr output: <op>(<arg>), eg. Neg8(t1)
2028      */
2029      struct {
2030         IROp    op;       /* op-code */
2031         IRExpr* arg;      /* operand */
2032      } Unop;
2033
2034      /* A load from memory -- a normal load, not a load-linked.
2035         Load-Linkeds (and Store-Conditionals) are instead represented
2036         by IRStmt.LLSC since Load-Linkeds have side effects and so
2037         are not semantically valid IRExpr's.
2038         ppIRExpr output: LD<end>:<ty>(<addr>), eg. LDle:I32(t1)
2039      */
2040      struct {
2041         IREndness end;    /* Endian-ness of the load */
2042         IRType    ty;     /* Type of the loaded value */
2043         IRExpr*   addr;   /* Address being loaded from */
2044      } Load;
2045
2046      /* A constant-valued expression.
2047         ppIRExpr output: <con>, eg. 0x4:I32
2048      */
2049      struct {
2050         IRConst* con;     /* The constant itself */
2051      } Const;
2052
2053      /* A call to a pure (no side-effects) helper C function.
2054
2055         With the 'cee' field, 'name' is the function's name.  It is
2056         only used for pretty-printing purposes.  The address to call
2057         (host address, of course) is stored in the 'addr' field
2058         inside 'cee'.
2059
2060         The 'args' field is a NULL-terminated array of arguments.
2061         The stated return IRType, and the implied argument types,
2062         must match that of the function being called well enough so
2063         that the back end can actually generate correct code for the
2064         call.
2065
2066         The called function **must** satisfy the following:
2067
2068         * no side effects -- must be a pure function, the result of
2069           which depends only on the passed parameters.
2070
2071         * it may not look at, nor modify, any of the guest state
2072           since that would hide guest state transitions from
2073           instrumenters
2074
2075         * it may not access guest memory, since that would hide
2076           guest memory transactions from the instrumenters
2077
2078         * it must not assume that arguments are being evaluated in a
2079           particular order. The oder of evaluation is unspecified.
2080
2081         This is restrictive, but makes the semantics clean, and does
2082         not interfere with IR optimisation.
2083
2084         If you want to call a helper which can mess with guest state
2085         and/or memory, instead use Ist_Dirty.  This is a lot more
2086         flexible, but you have to give a bunch of details about what
2087         the helper does (and you better be telling the truth,
2088         otherwise any derived instrumentation will be wrong).  Also
2089         Ist_Dirty inhibits various IR optimisations and so can cause
2090         quite poor code to be generated.  Try to avoid it.
2091
2092         In principle it would be allowable to have the arg vector
2093         contain an IRExpr_VECRET(), although not IRExpr_BBPTR(). However,
2094         at the moment there is no requirement for clean helper calls to
2095         be able to return V128 or V256 values.  Hence this is not allowed.
2096
2097         ppIRExpr output: <cee>(<args>):<retty>
2098                      eg. foo{0x80489304}(t1, t2):I32
2099      */
2100      struct {
2101         IRCallee* cee;    /* Function to call. */
2102         IRType    retty;  /* Type of return value. */
2103         IRExpr**  args;   /* Vector of argument expressions. */
2104      }  CCall;
2105
2106      /* A ternary if-then-else operator.  It returns iftrue if cond is
2107         nonzero, iffalse otherwise.  Note that it is STRICT, ie. both
2108         iftrue and iffalse are evaluated in all cases.
2109
2110         ppIRExpr output: ITE(<cond>,<iftrue>,<iffalse>),
2111                         eg. ITE(t6,t7,t8)
2112      */
2113      struct {
2114         IRExpr* cond;     /* Condition */
2115         IRExpr* iftrue;   /* True expression */
2116         IRExpr* iffalse;  /* False expression */
2117      } ITE;
2118   } Iex;
2119};
2120
2121/* Expression auxiliaries: a ternary expression. */
2122struct _IRTriop {
2123   IROp op;          /* op-code   */
2124   IRExpr* arg1;     /* operand 1 */
2125   IRExpr* arg2;     /* operand 2 */
2126   IRExpr* arg3;     /* operand 3 */
2127};
2128
2129/* Expression auxiliaries: a quarternary expression. */
2130struct _IRQop {
2131   IROp op;          /* op-code   */
2132   IRExpr* arg1;     /* operand 1 */
2133   IRExpr* arg2;     /* operand 2 */
2134   IRExpr* arg3;     /* operand 3 */
2135   IRExpr* arg4;     /* operand 4 */
2136};
2137
2138
2139/* Two special kinds of IRExpr, which can ONLY be used in
2140   argument lists for dirty helper calls (IRDirty.args) and in NO
2141   OTHER PLACES.  And then only in very limited ways.  */
2142
2143/* Denotes an argument which (in the helper) takes a pointer to a
2144   (naturally aligned) V128 or V256, into which the helper is expected
2145   to write its result.  Use of IRExpr_VECRET() is strictly
2146   controlled.  If the helper returns a V128 or V256 value then
2147   IRExpr_VECRET() must appear exactly once in the arg list, although
2148   it can appear anywhere, and the helper must have a C 'void' return
2149   type.  If the helper returns any other type, IRExpr_VECRET() may
2150   not appear in the argument list. */
2151
2152/* Denotes an void* argument which is passed to the helper, which at
2153   run time will point to the thread's guest state area.  This can
2154   only appear at most once in an argument list, and it may not appear
2155   at all in argument lists for clean helper calls. */
2156
2157static inline Bool is_IRExpr_VECRET_or_BBPTR ( const IRExpr* e ) {
2158   return e->tag == Iex_VECRET || e->tag == Iex_BBPTR;
2159}
2160
2161
2162/* Expression constructors. */
2163extern IRExpr* IRExpr_Binder ( Int binder );
2164extern IRExpr* IRExpr_Get    ( Int off, IRType ty );
2165extern IRExpr* IRExpr_GetI   ( IRRegArray* descr, IRExpr* ix, Int bias );
2166extern IRExpr* IRExpr_RdTmp  ( IRTemp tmp );
2167extern IRExpr* IRExpr_Qop    ( IROp op, IRExpr* arg1, IRExpr* arg2,
2168                                        IRExpr* arg3, IRExpr* arg4 );
2169extern IRExpr* IRExpr_Triop  ( IROp op, IRExpr* arg1,
2170                                        IRExpr* arg2, IRExpr* arg3 );
2171extern IRExpr* IRExpr_Binop  ( IROp op, IRExpr* arg1, IRExpr* arg2 );
2172extern IRExpr* IRExpr_Unop   ( IROp op, IRExpr* arg );
2173extern IRExpr* IRExpr_Load   ( IREndness end, IRType ty, IRExpr* addr );
2174extern IRExpr* IRExpr_Const  ( IRConst* con );
2175extern IRExpr* IRExpr_CCall  ( IRCallee* cee, IRType retty, IRExpr** args );
2176extern IRExpr* IRExpr_ITE    ( IRExpr* cond, IRExpr* iftrue, IRExpr* iffalse );
2177extern IRExpr* IRExpr_VECRET ( void );
2178extern IRExpr* IRExpr_BBPTR  ( void );
2179
2180/* Deep-copy an IRExpr. */
2181extern IRExpr* deepCopyIRExpr ( const IRExpr* );
2182
2183/* Pretty-print an IRExpr. */
2184extern void ppIRExpr ( const IRExpr* );
2185
2186/* NULL-terminated IRExpr vector constructors, suitable for
2187   use as arg lists in clean/dirty helper calls. */
2188extern IRExpr** mkIRExprVec_0 ( void );
2189extern IRExpr** mkIRExprVec_1 ( IRExpr* );
2190extern IRExpr** mkIRExprVec_2 ( IRExpr*, IRExpr* );
2191extern IRExpr** mkIRExprVec_3 ( IRExpr*, IRExpr*, IRExpr* );
2192extern IRExpr** mkIRExprVec_4 ( IRExpr*, IRExpr*, IRExpr*, IRExpr* );
2193extern IRExpr** mkIRExprVec_5 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2194                                IRExpr* );
2195extern IRExpr** mkIRExprVec_6 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2196                                IRExpr*, IRExpr* );
2197extern IRExpr** mkIRExprVec_7 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2198                                IRExpr*, IRExpr*, IRExpr* );
2199extern IRExpr** mkIRExprVec_8 ( IRExpr*, IRExpr*, IRExpr*, IRExpr*,
2200                                IRExpr*, IRExpr*, IRExpr*, IRExpr*);
2201
2202/* IRExpr copiers:
2203   - shallowCopy: shallow-copy (ie. create a new vector that shares the
2204     elements with the original).
2205   - deepCopy: deep-copy (ie. create a completely new vector). */
2206extern IRExpr** shallowCopyIRExprVec ( IRExpr** );
2207extern IRExpr** deepCopyIRExprVec ( IRExpr *const * );
2208
2209/* Make a constant expression from the given host word taking into
2210   account (of course) the host word size. */
2211extern IRExpr* mkIRExpr_HWord ( HWord );
2212
2213/* Convenience function for constructing clean helper calls. */
2214extern
2215IRExpr* mkIRExprCCall ( IRType retty,
2216                        Int regparms, const HChar* name, void* addr,
2217                        IRExpr** args );
2218
2219
2220/* Convenience functions for atoms (IRExprs which are either Iex_Tmp or
2221 * Iex_Const). */
2222static inline Bool isIRAtom ( const IRExpr* e ) {
2223   return toBool(e->tag == Iex_RdTmp || e->tag == Iex_Const);
2224}
2225
2226/* Are these two IR atoms identical?  Causes an assertion
2227   failure if they are passed non-atoms. */
2228extern Bool eqIRAtom ( const IRExpr*, const IRExpr* );
2229
2230
2231/* ------------------ Jump kinds ------------------ */
2232
2233/* This describes hints which can be passed to the dispatcher at guest
2234   control-flow transfer points.
2235
2236   Re Ijk_InvalICache and Ijk_FlushDCache: the guest state _must_ have
2237   two pseudo-registers, guest_CMSTART and guest_CMLEN, which specify
2238   the start and length of the region to be invalidated.  CM stands
2239   for "Cache Management".  These are both the size of a guest word.
2240   It is the responsibility of the relevant toIR.c to ensure that
2241   these are filled in with suitable values before issuing a jump of
2242   kind Ijk_InvalICache or Ijk_FlushDCache.
2243
2244   Ijk_InvalICache requests invalidation of translations taken from
2245   the requested range.  Ijk_FlushDCache requests flushing of the D
2246   cache for the specified range.
2247
2248   Re Ijk_EmWarn and Ijk_EmFail: the guest state must have a
2249   pseudo-register guest_EMNOTE, which is 32-bits regardless of the
2250   host or guest word size.  That register should be made to hold a
2251   VexEmNote value to indicate the reason for the exit.
2252
2253   In the case of Ijk_EmFail, the exit is fatal (Vex-generated code
2254   cannot continue) and so the jump destination can be anything.
2255
2256   Re Ijk_Sys_ (syscall jumps): the guest state must have a
2257   pseudo-register guest_IP_AT_SYSCALL, which is the size of a guest
2258   word.  Front ends should set this to be the IP at the most recently
2259   executed kernel-entering (system call) instruction.  This makes it
2260   very much easier (viz, actually possible at all) to back up the
2261   guest to restart a syscall that has been interrupted by a signal.
2262*/
2263typedef
2264   enum {
2265      Ijk_INVALID=0x1A00,
2266      Ijk_Boring,         /* not interesting; just goto next */
2267      Ijk_Call,           /* guest is doing a call */
2268      Ijk_Ret,            /* guest is doing a return */
2269      Ijk_ClientReq,      /* do guest client req before continuing */
2270      Ijk_Yield,          /* client is yielding to thread scheduler */
2271      Ijk_EmWarn,         /* report emulation warning before continuing */
2272      Ijk_EmFail,         /* emulation critical (FATAL) error; give up */
2273      Ijk_NoDecode,       /* current instruction cannot be decoded */
2274      Ijk_MapFail,        /* Vex-provided address translation failed */
2275      Ijk_InvalICache,    /* Inval icache for range [CMSTART, +CMLEN) */
2276      Ijk_FlushDCache,    /* Flush dcache for range [CMSTART, +CMLEN) */
2277      Ijk_NoRedir,        /* Jump to un-redirected guest addr */
2278      Ijk_SigILL,         /* current instruction synths SIGILL */
2279      Ijk_SigTRAP,        /* current instruction synths SIGTRAP */
2280      Ijk_SigSEGV,        /* current instruction synths SIGSEGV */
2281      Ijk_SigBUS,         /* current instruction synths SIGBUS */
2282      Ijk_SigFPE_IntDiv,  /* current instruction synths SIGFPE - IntDiv */
2283      Ijk_SigFPE_IntOvf,  /* current instruction synths SIGFPE - IntOvf */
2284      /* Unfortunately, various guest-dependent syscall kinds.  They
2285	 all mean: do a syscall before continuing. */
2286      Ijk_Sys_syscall,    /* amd64/x86 'syscall', ppc 'sc', arm 'svc #0' */
2287      Ijk_Sys_int32,      /* amd64/x86 'int $0x20' */
2288      Ijk_Sys_int128,     /* amd64/x86 'int $0x80' */
2289      Ijk_Sys_int129,     /* amd64/x86 'int $0x81' */
2290      Ijk_Sys_int130,     /* amd64/x86 'int $0x82' */
2291      Ijk_Sys_int145,     /* amd64/x86 'int $0x91' */
2292      Ijk_Sys_int210,     /* amd64/x86 'int $0xD2' */
2293      Ijk_Sys_sysenter    /* x86 'sysenter'.  guest_EIP becomes
2294                             invalid at the point this happens. */
2295   }
2296   IRJumpKind;
2297
2298extern void ppIRJumpKind ( IRJumpKind );
2299
2300
2301/* ------------------ Dirty helper calls ------------------ */
2302
2303/* A dirty call is a flexible mechanism for calling (possibly
2304   conditionally) a helper function or procedure.  The helper function
2305   may read, write or modify client memory, and may read, write or
2306   modify client state.  It can take arguments and optionally return a
2307   value.  It may return different results and/or do different things
2308   when called repeatedly with the same arguments, by means of storing
2309   private state.
2310
2311   If a value is returned, it is assigned to the nominated return
2312   temporary.
2313
2314   Dirty calls are statements rather than expressions for obvious
2315   reasons.  If a dirty call is marked as writing guest state, any
2316   pre-existing values derived from the written parts of the guest
2317   state are invalid.  Similarly, if the dirty call is stated as
2318   writing memory, any pre-existing loaded values are invalidated by
2319   it.
2320
2321   In order that instrumentation is possible, the call must state, and
2322   state correctly:
2323
2324   * Whether it reads, writes or modifies memory, and if so where.
2325
2326   * Whether it reads, writes or modifies guest state, and if so which
2327     pieces.  Several pieces may be stated, and their extents must be
2328     known at translation-time.  Each piece is allowed to repeat some
2329     number of times at a fixed interval, if required.
2330
2331   Normally, code is generated to pass just the args to the helper.
2332   However, if IRExpr_BBPTR() is present in the argument list (at most
2333   one instance is allowed), then the baseblock pointer is passed for
2334   that arg, so that the callee can access the guest state.  It is
2335   invalid for .nFxState to be zero but IRExpr_BBPTR() to be present,
2336   since .nFxState==0 is a claim that the call does not access guest
2337   state.
2338
2339   IMPORTANT NOTE re GUARDS: Dirty calls are strict, very strict.  The
2340   arguments and 'mFx' are evaluated REGARDLESS of the guard value.
2341   The order of argument evaluation is unspecified.  The guard
2342   expression is evaluated AFTER the arguments and 'mFx' have been
2343   evaluated.  'mFx' is expected (by Memcheck) to be a defined value
2344   even if the guard evaluates to false.
2345*/
2346
2347#define VEX_N_FXSTATE  7   /* enough for FXSAVE/FXRSTOR on x86 */
2348
2349/* Effects on resources (eg. registers, memory locations) */
2350typedef
2351   enum {
2352      Ifx_None=0x1B00,      /* no effect */
2353      Ifx_Read,             /* reads the resource */
2354      Ifx_Write,            /* writes the resource */
2355      Ifx_Modify,           /* modifies the resource */
2356   }
2357   IREffect;
2358
2359/* Pretty-print an IREffect */
2360extern void ppIREffect ( IREffect );
2361
2362typedef
2363   struct _IRDirty {
2364      /* What to call, and details of args/results.  .guard must be
2365         non-NULL.  If .tmp is not IRTemp_INVALID, then the call
2366         returns a result which is placed in .tmp.  If at runtime the
2367         guard evaluates to false, .tmp has an 0x555..555 bit pattern
2368         written to it.  Hence conditional calls that assign .tmp are
2369         allowed. */
2370      IRCallee* cee;    /* where to call */
2371      IRExpr*   guard;  /* :: Ity_Bit.  Controls whether call happens */
2372      /* The args vector may contain IRExpr_BBPTR() and/or
2373         IRExpr_VECRET(), in both cases, at most once. */
2374      IRExpr**  args;   /* arg vector, ends in NULL. */
2375      IRTemp    tmp;    /* to assign result to, or IRTemp_INVALID if none */
2376
2377      /* Mem effects; we allow only one R/W/M region to be stated */
2378      IREffect  mFx;    /* indicates memory effects, if any */
2379      IRExpr*   mAddr;  /* of access, or NULL if mFx==Ifx_None */
2380      Int       mSize;  /* of access, or zero if mFx==Ifx_None */
2381
2382      /* Guest state effects; up to N allowed */
2383      Int  nFxState; /* must be 0 .. VEX_N_FXSTATE */
2384      struct {
2385         IREffect fx:16;   /* read, write or modify?  Ifx_None is invalid. */
2386         UShort   offset;
2387         UShort   size;
2388         UChar    nRepeats;
2389         UChar    repeatLen;
2390      } fxState[VEX_N_FXSTATE];
2391      /* The access can be repeated, as specified by nRepeats and
2392         repeatLen.  To describe only a single access, nRepeats and
2393         repeatLen should be zero.  Otherwise, repeatLen must be a
2394         multiple of size and greater than size. */
2395      /* Overall, the parts of the guest state denoted by (offset,
2396         size, nRepeats, repeatLen) is
2397               [offset, +size)
2398            and, if nRepeats > 0,
2399               for (i = 1; i <= nRepeats; i++)
2400                  [offset + i * repeatLen, +size)
2401         A convenient way to enumerate all segments is therefore
2402            for (i = 0; i < 1 + nRepeats; i++)
2403               [offset + i * repeatLen, +size)
2404      */
2405   }
2406   IRDirty;
2407
2408/* Pretty-print a dirty call */
2409extern void     ppIRDirty ( const IRDirty* );
2410
2411/* Allocate an uninitialised dirty call */
2412extern IRDirty* emptyIRDirty ( void );
2413
2414/* Deep-copy a dirty call */
2415extern IRDirty* deepCopyIRDirty ( const IRDirty* );
2416
2417/* A handy function which takes some of the tedium out of constructing
2418   dirty helper calls.  The called function impliedly does not return
2419   any value and has a constant-True guard.  The call is marked as
2420   accessing neither guest state nor memory (hence the "unsafe"
2421   designation) -- you can change this marking later if need be.  A
2422   suitable IRCallee is constructed from the supplied bits. */
2423extern
2424IRDirty* unsafeIRDirty_0_N ( Int regparms, const HChar* name, void* addr,
2425                             IRExpr** args );
2426
2427/* Similarly, make a zero-annotation dirty call which returns a value,
2428   and assign that to the given temp. */
2429extern
2430IRDirty* unsafeIRDirty_1_N ( IRTemp dst,
2431                             Int regparms, const HChar* name, void* addr,
2432                             IRExpr** args );
2433
2434
2435/* --------------- Memory Bus Events --------------- */
2436
2437typedef
2438   enum {
2439      Imbe_Fence=0x1C00,
2440      /* Needed only on ARM.  It cancels a reservation made by a
2441         preceding Linked-Load, and needs to be handed through to the
2442         back end, just as LL and SC themselves are. */
2443      Imbe_CancelReservation
2444   }
2445   IRMBusEvent;
2446
2447extern void ppIRMBusEvent ( IRMBusEvent );
2448
2449
2450/* --------------- Compare and Swap --------------- */
2451
2452/* This denotes an atomic compare and swap operation, either
2453   a single-element one or a double-element one.
2454
2455   In the single-element case:
2456
2457     .addr is the memory address.
2458     .end  is the endianness with which memory is accessed
2459
2460     If .addr contains the same value as .expdLo, then .dataLo is
2461     written there, else there is no write.  In both cases, the
2462     original value at .addr is copied into .oldLo.
2463
2464     Types: .expdLo, .dataLo and .oldLo must all have the same type.
2465     It may be any integral type, viz: I8, I16, I32 or, for 64-bit
2466     guests, I64.
2467
2468     .oldHi must be IRTemp_INVALID, and .expdHi and .dataHi must
2469     be NULL.
2470
2471   In the double-element case:
2472
2473     .addr is the memory address.
2474     .end  is the endianness with which memory is accessed
2475
2476     The operation is the same:
2477
2478     If .addr contains the same value as .expdHi:.expdLo, then
2479     .dataHi:.dataLo is written there, else there is no write.  In
2480     both cases the original value at .addr is copied into
2481     .oldHi:.oldLo.
2482
2483     Types: .expdHi, .expdLo, .dataHi, .dataLo, .oldHi, .oldLo must
2484     all have the same type, which may be any integral type, viz: I8,
2485     I16, I32 or, for 64-bit guests, I64.
2486
2487     The double-element case is complicated by the issue of
2488     endianness.  In all cases, the two elements are understood to be
2489     located adjacently in memory, starting at the address .addr.
2490
2491       If .end is Iend_LE, then the .xxxLo component is at the lower
2492       address and the .xxxHi component is at the higher address, and
2493       each component is itself stored little-endianly.
2494
2495       If .end is Iend_BE, then the .xxxHi component is at the lower
2496       address and the .xxxLo component is at the higher address, and
2497       each component is itself stored big-endianly.
2498
2499   This allows representing more cases than most architectures can
2500   handle.  For example, x86 cannot do DCAS on 8- or 16-bit elements.
2501
2502   How to know if the CAS succeeded?
2503
2504   * if .oldLo == .expdLo (resp. .oldHi:.oldLo == .expdHi:.expdLo),
2505     then the CAS succeeded, .dataLo (resp. .dataHi:.dataLo) is now
2506     stored at .addr, and the original value there was .oldLo (resp
2507     .oldHi:.oldLo).
2508
2509   * if .oldLo != .expdLo (resp. .oldHi:.oldLo != .expdHi:.expdLo),
2510     then the CAS failed, and the original value at .addr was .oldLo
2511     (resp. .oldHi:.oldLo).
2512
2513   Hence it is easy to know whether or not the CAS succeeded.
2514*/
2515typedef
2516   struct {
2517      IRTemp    oldHi;  /* old value of *addr is written here */
2518      IRTemp    oldLo;
2519      IREndness end;    /* endianness of the data in memory */
2520      IRExpr*   addr;   /* store address */
2521      IRExpr*   expdHi; /* expected old value at *addr */
2522      IRExpr*   expdLo;
2523      IRExpr*   dataHi; /* new value for *addr */
2524      IRExpr*   dataLo;
2525   }
2526   IRCAS;
2527
2528extern void ppIRCAS ( const IRCAS* cas );
2529
2530extern IRCAS* mkIRCAS ( IRTemp oldHi, IRTemp oldLo,
2531                        IREndness end, IRExpr* addr,
2532                        IRExpr* expdHi, IRExpr* expdLo,
2533                        IRExpr* dataHi, IRExpr* dataLo );
2534
2535extern IRCAS* deepCopyIRCAS ( const IRCAS* );
2536
2537
2538/* ------------------ Circular Array Put ------------------ */
2539
2540typedef
2541   struct {
2542      IRRegArray* descr; /* Part of guest state treated as circular */
2543      IRExpr*     ix;    /* Variable part of index into array */
2544      Int         bias;  /* Constant offset part of index into array */
2545      IRExpr*     data;  /* The value to write */
2546   } IRPutI;
2547
2548extern void ppIRPutI ( const IRPutI* puti );
2549
2550extern IRPutI* mkIRPutI ( IRRegArray* descr, IRExpr* ix,
2551                          Int bias, IRExpr* data );
2552
2553extern IRPutI* deepCopyIRPutI ( const IRPutI* );
2554
2555
2556/* --------------- Guarded loads and stores --------------- */
2557
2558/* Conditional stores are straightforward.  They are the same as
2559   normal stores, with an extra 'guard' field :: Ity_I1 that
2560   determines whether or not the store actually happens.  If not,
2561   memory is unmodified.
2562
2563   The semantics of this is that 'addr' and 'data' are fully evaluated
2564   even in the case where 'guard' evaluates to zero (false).
2565*/
2566typedef
2567   struct {
2568      IREndness end;    /* Endianness of the store */
2569      IRExpr*   addr;   /* store address */
2570      IRExpr*   data;   /* value to write */
2571      IRExpr*   guard;  /* Guarding value */
2572   }
2573   IRStoreG;
2574
2575/* Conditional loads are a little more complex.  'addr' is the
2576   address, 'guard' is the guarding condition.  If the load takes
2577   place, the loaded value is placed in 'dst'.  If it does not take
2578   place, 'alt' is copied to 'dst'.  However, the loaded value is not
2579   placed directly in 'dst' -- it is first subjected to the conversion
2580   specified by 'cvt'.
2581
2582   For example, imagine doing a conditional 8-bit load, in which the
2583   loaded value is zero extended to 32 bits.  Hence:
2584   * 'dst' and 'alt' must have type I32
2585   * 'cvt' must be a unary op which converts I8 to I32.  In this
2586     example, it would be ILGop_8Uto32.
2587
2588   There is no explicit indication of the type at which the load is
2589   done, since that is inferrable from the arg type of 'cvt'.  Note
2590   that the types of 'alt' and 'dst' and the result type of 'cvt' must
2591   all be the same.
2592
2593   Semantically, 'addr' is evaluated even in the case where 'guard'
2594   evaluates to zero (false), and 'alt' is evaluated even when 'guard'
2595   evaluates to one (true).  That is, 'addr' and 'alt' are always
2596   evaluated.
2597*/
2598typedef
2599   enum {
2600      ILGop_INVALID=0x1D00,
2601      ILGop_IdentV128, /* 128 bit vector, no conversion */
2602      ILGop_Ident64,   /* 64 bit, no conversion */
2603      ILGop_Ident32,   /* 32 bit, no conversion */
2604      ILGop_16Uto32,   /* 16 bit load, Z-widen to 32 */
2605      ILGop_16Sto32,   /* 16 bit load, S-widen to 32 */
2606      ILGop_8Uto32,    /* 8 bit load, Z-widen to 32 */
2607      ILGop_8Sto32     /* 8 bit load, S-widen to 32 */
2608   }
2609   IRLoadGOp;
2610
2611typedef
2612   struct {
2613      IREndness end;    /* Endianness of the load */
2614      IRLoadGOp cvt;    /* Conversion to apply to the loaded value */
2615      IRTemp    dst;    /* Destination (LHS) of assignment */
2616      IRExpr*   addr;   /* Address being loaded from */
2617      IRExpr*   alt;    /* Value if load is not done. */
2618      IRExpr*   guard;  /* Guarding value */
2619   }
2620   IRLoadG;
2621
2622extern void ppIRStoreG ( const IRStoreG* sg );
2623
2624extern void ppIRLoadGOp ( IRLoadGOp cvt );
2625
2626extern void ppIRLoadG ( const IRLoadG* lg );
2627
2628extern IRStoreG* mkIRStoreG ( IREndness end,
2629                              IRExpr* addr, IRExpr* data,
2630                              IRExpr* guard );
2631
2632extern IRLoadG* mkIRLoadG ( IREndness end, IRLoadGOp cvt,
2633                            IRTemp dst, IRExpr* addr, IRExpr* alt,
2634                            IRExpr* guard );
2635
2636
2637/* ------------------ Statements ------------------ */
2638
2639/* The different kinds of statements.  Their meaning is explained
2640   below in the comments for IRStmt.
2641
2642   Those marked META do not represent code, but rather extra
2643   information about the code.  These statements can be removed
2644   without affecting the functional behaviour of the code, however
2645   they are required by some IR consumers such as tools that
2646   instrument the code.
2647*/
2648
2649typedef
2650   enum {
2651      Ist_NoOp=0x1E00,
2652      Ist_IMark,     /* META */
2653      Ist_AbiHint,   /* META */
2654      Ist_Put,
2655      Ist_PutI,
2656      Ist_WrTmp,
2657      Ist_Store,
2658      Ist_LoadG,
2659      Ist_StoreG,
2660      Ist_CAS,
2661      Ist_LLSC,
2662      Ist_Dirty,
2663      Ist_MBE,
2664      Ist_Exit
2665   }
2666   IRStmtTag;
2667
2668/* A statement.  Stored as a tagged union.  'tag' indicates what kind
2669   of expression this is.  'Ist' is the union that holds the fields.
2670   If an IRStmt 'st' has st.tag equal to Iex_Store, then it's a store
2671   statement, and the fields can be accessed with
2672   'st.Ist.Store.<fieldname>'.
2673
2674   For each kind of statement, we show what it looks like when
2675   pretty-printed with ppIRStmt().
2676*/
2677typedef
2678   struct _IRStmt {
2679      IRStmtTag tag;
2680      union {
2681         /* A no-op (usually resulting from IR optimisation).  Can be
2682            omitted without any effect.
2683
2684            ppIRStmt output: IR-NoOp
2685         */
2686         struct {
2687	 } NoOp;
2688
2689         /* META: instruction mark.  Marks the start of the statements
2690            that represent a single machine instruction (the end of
2691            those statements is marked by the next IMark or the end of
2692            the IRSB).  Contains the address and length of the
2693            instruction.
2694
2695            It also contains a delta value.  The delta must be
2696            subtracted from a guest program counter value before
2697            attempting to establish, by comparison with the address
2698            and length values, whether or not that program counter
2699            value refers to this instruction.  For x86, amd64, ppc32,
2700            ppc64 and arm, the delta value is zero.  For Thumb
2701            instructions, the delta value is one.  This is because, on
2702            Thumb, guest PC values (guest_R15T) are encoded using the
2703            top 31 bits of the instruction address and a 1 in the lsb;
2704            hence they appear to be (numerically) 1 past the start of
2705            the instruction they refer to.  IOW, guest_R15T on ARM
2706            holds a standard ARM interworking address.
2707
2708            ppIRStmt output: ------ IMark(<addr>, <len>, <delta>) ------,
2709                         eg. ------ IMark(0x4000792, 5, 0) ------,
2710         */
2711         struct {
2712            Addr   addr;   /* instruction address */
2713            UInt   len;    /* instruction length */
2714            UChar  delta;  /* addr = program counter as encoded in guest state
2715                                     - delta */
2716         } IMark;
2717
2718         /* META: An ABI hint, which says something about this
2719            platform's ABI.
2720
2721            At the moment, the only AbiHint is one which indicates
2722            that a given chunk of address space, [base .. base+len-1],
2723            has become undefined.  This is used on amd64-linux and
2724            some ppc variants to pass stack-redzoning hints to whoever
2725            wants to see them.  It also indicates the address of the
2726            next (dynamic) instruction that will be executed.  This is
2727            to help Memcheck to origin tracking.
2728
2729            ppIRStmt output: ====== AbiHint(<base>, <len>, <nia>) ======
2730                         eg. ====== AbiHint(t1, 16, t2) ======
2731         */
2732         struct {
2733            IRExpr* base;     /* Start  of undefined chunk */
2734            Int     len;      /* Length of undefined chunk */
2735            IRExpr* nia;      /* Address of next (guest) insn */
2736         } AbiHint;
2737
2738         /* Write a guest register, at a fixed offset in the guest state.
2739            ppIRStmt output: PUT(<offset>) = <data>, eg. PUT(60) = t1
2740         */
2741         struct {
2742            Int     offset;   /* Offset into the guest state */
2743            IRExpr* data;     /* The value to write */
2744         } Put;
2745
2746         /* Write a guest register, at a non-fixed offset in the guest
2747            state.  See the comment for GetI expressions for more
2748            information.
2749
2750            ppIRStmt output: PUTI<descr>[<ix>,<bias>] = <data>,
2751                         eg. PUTI(64:8xF64)[t5,0] = t1
2752         */
2753         struct {
2754            IRPutI* details;
2755         } PutI;
2756
2757         /* Assign a value to a temporary.  Note that SSA rules require
2758            each tmp is only assigned to once.  IR sanity checking will
2759            reject any block containing a temporary which is not assigned
2760            to exactly once.
2761
2762            ppIRStmt output: t<tmp> = <data>, eg. t1 = 3
2763         */
2764         struct {
2765            IRTemp  tmp;   /* Temporary  (LHS of assignment) */
2766            IRExpr* data;  /* Expression (RHS of assignment) */
2767         } WrTmp;
2768
2769         /* Write a value to memory.  This is a normal store, not a
2770            Store-Conditional.  To represent a Store-Conditional,
2771            instead use IRStmt.LLSC.
2772            ppIRStmt output: ST<end>(<addr>) = <data>, eg. STle(t1) = t2
2773         */
2774         struct {
2775            IREndness end;    /* Endianness of the store */
2776            IRExpr*   addr;   /* store address */
2777            IRExpr*   data;   /* value to write */
2778         } Store;
2779
2780         /* Guarded store.  Note that this is defined to evaluate all
2781            expression fields (addr, data) even if the guard evaluates
2782            to false.
2783            ppIRStmt output:
2784              if (<guard>) ST<end>(<addr>) = <data> */
2785         struct {
2786            IRStoreG* details;
2787         } StoreG;
2788
2789         /* Guarded load.  Note that this is defined to evaluate all
2790            expression fields (addr, alt) even if the guard evaluates
2791            to false.
2792            ppIRStmt output:
2793              t<tmp> = if (<guard>) <cvt>(LD<end>(<addr>)) else <alt> */
2794         struct {
2795            IRLoadG* details;
2796         } LoadG;
2797
2798         /* Do an atomic compare-and-swap operation.  Semantics are
2799            described above on a comment at the definition of IRCAS.
2800
2801            ppIRStmt output:
2802               t<tmp> = CAS<end>(<addr> :: <expected> -> <new>)
2803            eg
2804               t1 = CASle(t2 :: t3->Add32(t3,1))
2805               which denotes a 32-bit atomic increment
2806               of a value at address t2
2807
2808            A double-element CAS may also be denoted, in which case <tmp>,
2809            <expected> and <new> are all pairs of items, separated by
2810            commas.
2811         */
2812         struct {
2813            IRCAS* details;
2814         } CAS;
2815
2816         /* Either Load-Linked or Store-Conditional, depending on
2817            STOREDATA.
2818
2819            If STOREDATA is NULL then this is a Load-Linked, meaning
2820            that data is loaded from memory as normal, but a
2821            'reservation' for the address is also lodged in the
2822            hardware.
2823
2824               result = Load-Linked(addr, end)
2825
2826            The data transfer type is the type of RESULT (I32, I64,
2827            etc).  ppIRStmt output:
2828
2829               result = LD<end>-Linked(<addr>), eg. LDbe-Linked(t1)
2830
2831            If STOREDATA is not NULL then this is a Store-Conditional,
2832            hence:
2833
2834               result = Store-Conditional(addr, storedata, end)
2835
2836            The data transfer type is the type of STOREDATA and RESULT
2837            has type Ity_I1. The store may fail or succeed depending
2838            on the state of a previously lodged reservation on this
2839            address.  RESULT is written 1 if the store succeeds and 0
2840            if it fails.  eg ppIRStmt output:
2841
2842               result = ( ST<end>-Cond(<addr>) = <storedata> )
2843               eg t3 = ( STbe-Cond(t1, t2) )
2844
2845            In all cases, the address must be naturally aligned for
2846            the transfer type -- any misaligned addresses should be
2847            caught by a dominating IR check and side exit.  This
2848            alignment restriction exists because on at least some
2849            LL/SC platforms (ppc), stwcx. etc will trap w/ SIGBUS on
2850            misaligned addresses, and we have to actually generate
2851            stwcx. on the host, and we don't want it trapping on the
2852            host.
2853
2854            Summary of rules for transfer type:
2855              STOREDATA == NULL (LL):
2856                transfer type = type of RESULT
2857              STOREDATA != NULL (SC):
2858                transfer type = type of STOREDATA, and RESULT :: Ity_I1
2859         */
2860         struct {
2861            IREndness end;
2862            IRTemp    result;
2863            IRExpr*   addr;
2864            IRExpr*   storedata; /* NULL => LL, non-NULL => SC */
2865         } LLSC;
2866
2867         /* Call (possibly conditionally) a C function that has side
2868            effects (ie. is "dirty").  See the comments above the
2869            IRDirty type declaration for more information.
2870
2871            ppIRStmt output:
2872               t<tmp> = DIRTY <guard> <effects>
2873                  ::: <callee>(<args>)
2874            eg.
2875               t1 = DIRTY t27 RdFX-gst(16,4) RdFX-gst(60,4)
2876                     ::: foo{0x380035f4}(t2)
2877         */
2878         struct {
2879            IRDirty* details;
2880         } Dirty;
2881
2882         /* A memory bus event - a fence, or acquisition/release of the
2883            hardware bus lock.  IR optimisation treats all these as fences
2884            across which no memory references may be moved.
2885            ppIRStmt output: MBusEvent-Fence,
2886                             MBusEvent-BusLock, MBusEvent-BusUnlock.
2887         */
2888         struct {
2889            IRMBusEvent event;
2890         } MBE;
2891
2892         /* Conditional exit from the middle of an IRSB.
2893            ppIRStmt output: if (<guard>) goto {<jk>} <dst>
2894                         eg. if (t69) goto {Boring} 0x4000AAA:I32
2895            If <guard> is true, the guest state is also updated by
2896            PUT-ing <dst> at <offsIP>.  This is done because a
2897            taken exit must update the guest program counter.
2898         */
2899         struct {
2900            IRExpr*    guard;    /* Conditional expression */
2901            IRConst*   dst;      /* Jump target (constant only) */
2902            IRJumpKind jk;       /* Jump kind */
2903            Int        offsIP;   /* Guest state offset for IP */
2904         } Exit;
2905      } Ist;
2906   }
2907   IRStmt;
2908
2909/* Statement constructors. */
2910extern IRStmt* IRStmt_NoOp    ( void );
2911extern IRStmt* IRStmt_IMark   ( Addr addr, UInt len, UChar delta );
2912extern IRStmt* IRStmt_AbiHint ( IRExpr* base, Int len, IRExpr* nia );
2913extern IRStmt* IRStmt_Put     ( Int off, IRExpr* data );
2914extern IRStmt* IRStmt_PutI    ( IRPutI* details );
2915extern IRStmt* IRStmt_WrTmp   ( IRTemp tmp, IRExpr* data );
2916extern IRStmt* IRStmt_Store   ( IREndness end, IRExpr* addr, IRExpr* data );
2917extern IRStmt* IRStmt_StoreG  ( IREndness end, IRExpr* addr, IRExpr* data,
2918                                IRExpr* guard );
2919extern IRStmt* IRStmt_LoadG   ( IREndness end, IRLoadGOp cvt, IRTemp dst,
2920                                IRExpr* addr, IRExpr* alt, IRExpr* guard );
2921extern IRStmt* IRStmt_CAS     ( IRCAS* details );
2922extern IRStmt* IRStmt_LLSC    ( IREndness end, IRTemp result,
2923                                IRExpr* addr, IRExpr* storedata );
2924extern IRStmt* IRStmt_Dirty   ( IRDirty* details );
2925extern IRStmt* IRStmt_MBE     ( IRMBusEvent event );
2926extern IRStmt* IRStmt_Exit    ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
2927                                Int offsIP );
2928
2929/* Deep-copy an IRStmt. */
2930extern IRStmt* deepCopyIRStmt ( const IRStmt* );
2931
2932/* Pretty-print an IRStmt. */
2933extern void ppIRStmt ( const IRStmt* );
2934
2935
2936/* ------------------ Basic Blocks ------------------ */
2937
2938/* Type environments: a bunch of statements, expressions, etc, are
2939   incomplete without an environment indicating the type of each
2940   IRTemp.  So this provides one.  IR temporaries are really just
2941   unsigned ints and so this provides an array, 0 .. n_types_used-1 of
2942   them.
2943*/
2944typedef
2945   struct {
2946      IRType* types;
2947      Int     types_size;
2948      Int     types_used;
2949   }
2950   IRTypeEnv;
2951
2952/* Obtain a new IRTemp */
2953extern IRTemp newIRTemp ( IRTypeEnv*, IRType );
2954
2955/* Deep-copy a type environment */
2956extern IRTypeEnv* deepCopyIRTypeEnv ( const IRTypeEnv* );
2957
2958/* Pretty-print a type environment */
2959extern void ppIRTypeEnv ( const IRTypeEnv* );
2960
2961
2962/* Code blocks, which in proper compiler terminology are superblocks
2963   (single entry, multiple exit code sequences) contain:
2964
2965   - A table giving a type for each temp (the "type environment")
2966   - An expandable array of statements
2967   - An expression of type 32 or 64 bits, depending on the
2968     guest's word size, indicating the next destination if the block
2969     executes all the way to the end, without a side exit
2970   - An indication of any special actions (JumpKind) needed
2971     for this final jump.
2972   - Offset of the IP field in the guest state.  This will be
2973     updated before the final jump is done.
2974
2975   "IRSB" stands for "IR Super Block".
2976*/
2977typedef
2978   struct {
2979      IRTypeEnv* tyenv;
2980      IRStmt**   stmts;
2981      Int        stmts_size;
2982      Int        stmts_used;
2983      IRExpr*    next;
2984      IRJumpKind jumpkind;
2985      Int        offsIP;
2986   }
2987   IRSB;
2988
2989/* Allocate a new, uninitialised IRSB */
2990extern IRSB* emptyIRSB ( void );
2991
2992/* Deep-copy an IRSB */
2993extern IRSB* deepCopyIRSB ( const IRSB* );
2994
2995/* Deep-copy an IRSB, except for the statements list, which set to be
2996   a new, empty, list of statements. */
2997extern IRSB* deepCopyIRSBExceptStmts ( const IRSB* );
2998
2999/* Pretty-print an IRSB */
3000extern void ppIRSB ( const IRSB* );
3001
3002/* Append an IRStmt to an IRSB */
3003extern void addStmtToIRSB ( IRSB*, IRStmt* );
3004
3005
3006/*---------------------------------------------------------------*/
3007/*--- Helper functions for the IR                             ---*/
3008/*---------------------------------------------------------------*/
3009
3010/* For messing with IR type environments */
3011extern IRTypeEnv* emptyIRTypeEnv  ( void );
3012
3013/* What is the type of this expression? */
3014extern IRType typeOfIRConst ( const IRConst* );
3015extern IRType typeOfIRTemp  ( const IRTypeEnv*, IRTemp );
3016extern IRType typeOfIRExpr  ( const IRTypeEnv*, const IRExpr* );
3017
3018/* What are the arg and result type for this IRLoadGOp? */
3019extern void typeOfIRLoadGOp ( IRLoadGOp cvt,
3020                              /*OUT*/IRType* t_res,
3021                              /*OUT*/IRType* t_arg );
3022
3023/* Sanity check a BB of IR */
3024extern void sanityCheckIRSB ( const  IRSB*  bb,
3025                              const  HChar* caller,
3026                              Bool   require_flatness,
3027                              IRType guest_word_size );
3028extern Bool isFlatIRStmt ( const IRStmt* );
3029
3030/* Is this any value actually in the enumeration 'IRType' ? */
3031extern Bool isPlausibleIRType ( IRType ty );
3032
3033
3034/*---------------------------------------------------------------*/
3035/*--- IR injection                                            ---*/
3036/*---------------------------------------------------------------*/
3037
3038void vex_inject_ir(IRSB *, IREndness);
3039
3040
3041#endif /* ndef __LIBVEX_IR_H */
3042
3043/*---------------------------------------------------------------*/
3044/*---                                             libvex_ir.h ---*/
3045/*---------------------------------------------------------------*/
3046