guest_s390_helpers.c revision 18bf154f1c34d8234fb25f63a3b906b46009930c
1b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/* -*- mode: C; c-basic-offset: 3; -*- */
2b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
3b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*---------------------------------------------------------------*/
4b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*--- begin                              guest_s390_helpers.c ---*/
5b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*---------------------------------------------------------------*/
6b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
7b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*
8b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   This file is part of Valgrind, a dynamic binary instrumentation
9b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   framework.
10b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
11b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   Copyright IBM Corp. 2010-2013
12b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
13b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   This program is free software; you can redistribute it and/or
14b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   modify it under the terms of the GNU General Public License as
15b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   published by the Free Software Foundation; either version 2 of the
16b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   License, or (at your option) any later version.
177cc70b4f0ad1064a4a0dce6056ad82b205887160Tyler Gunn
18b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   This program is distributed in the hope that it will be useful, but
19b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   WITHOUT ANY WARRANTY; without even the implied warranty of
20b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   General Public License for more details.
22b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
23b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   You should have received a copy of the GNU General Public License
24b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   along with this program; if not, write to the Free Software
25b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   02110-1301, USA.
27b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
28b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   The GNU General Public License is contained in the file COPYING.
29b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn*/
30b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
31b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/* Contributed by Florian Krohm */
32b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
33b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "libvex_basictypes.h"
34b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "libvex_emnote.h"
35b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "libvex_guest_s390x.h"
36b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "libvex_ir.h"
37b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "libvex.h"
38b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "libvex_s390x_common.h"
39b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
40b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "main_util.h"
41b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "main_globals.h"
42b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "guest_generic_bb_to_IR.h"
43b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "guest_s390_defs.h"
44b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn#include "s390_defs.h"               /* S390_BFP_ROUND_xyzzy */
45b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
46b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunnvoid
47b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler GunnLibVEX_GuestS390X_initialise(VexGuestS390XState *state)
48b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn{
49b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*------------------------------------------------------------*/
50b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*--- Initialise ar registers                              ---*/
51b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*------------------------------------------------------------*/
52b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
53b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a0 = 0;
54b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a1 = 0;
55b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a2 = 0;
56b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a3 = 0;
57b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a4 = 0;
58b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a5 = 0;
59b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a6 = 0;
60b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a7 = 0;
61b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a8 = 0;
62b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a9 = 0;
63b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a10 = 0;
64b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a11 = 0;
65b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a12 = 0;
66b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a13 = 0;
67b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a14 = 0;
68b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_a15 = 0;
69b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
70b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*------------------------------------------------------------*/
71b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*--- Initialise fpr registers                             ---*/
72b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*------------------------------------------------------------*/
73b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
74b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f0 = 0;
75b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f1 = 0;
76b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f2 = 0;
77b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f3 = 0;
78b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f4 = 0;
79b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f5 = 0;
80b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f6 = 0;
81b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f7 = 0;
82b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f8 = 0;
83b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f9 = 0;
84b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f10 = 0;
85b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f11 = 0;
86b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f12 = 0;
87b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f13 = 0;
88b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f14 = 0;
89b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_f15 = 0;
90b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
91b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*------------------------------------------------------------*/
92b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*--- Initialise gpr registers                             ---*/
93b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn/*------------------------------------------------------------*/
94b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn
95b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_r0 = 0;
96b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_r1 = 0;
97b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_r2 = 0;
98b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_r3 = 0;
99b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_r4 = 0;
100b1a95a77522b2a1c98b378a2e538d819918b7f2cTyler Gunn   state->guest_r5 = 0;
101   state->guest_r6 = 0;
102   state->guest_r7 = 0;
103   state->guest_r8 = 0;
104   state->guest_r9 = 0;
105   state->guest_r10 = 0;
106   state->guest_r11 = 0;
107   state->guest_r12 = 0;
108   state->guest_r13 = 0;
109   state->guest_r14 = 0;
110   state->guest_r15 = 0;
111
112/*------------------------------------------------------------*/
113/*--- Initialise S390 miscellaneous registers              ---*/
114/*------------------------------------------------------------*/
115
116   state->guest_counter = 0;
117   state->guest_fpc = 0;
118   state->guest_IA = 0;
119
120/*------------------------------------------------------------*/
121/*--- Initialise S390 pseudo registers                     ---*/
122/*------------------------------------------------------------*/
123
124   state->guest_SYSNO = 0;
125
126/*------------------------------------------------------------*/
127/*--- Initialise generic pseudo registers                  ---*/
128/*------------------------------------------------------------*/
129
130   state->guest_NRADDR = 0;
131   state->guest_CMSTART = 0;
132   state->guest_CMLEN = 0;
133   state->guest_IP_AT_SYSCALL = 0;
134   state->guest_EMNOTE = EmNote_NONE;
135   state->host_EvC_COUNTER = 0;
136   state->host_EvC_FAILADDR = 0;
137
138/*------------------------------------------------------------*/
139/*--- Initialise thunk                                     ---*/
140/*------------------------------------------------------------*/
141
142   state->guest_CC_OP = 0;
143   state->guest_CC_DEP1 = 0;
144   state->guest_CC_DEP2 = 0;
145   state->guest_CC_NDEP = 0;
146
147   __builtin_memset(state->padding, 0x0, sizeof(state->padding));
148}
149
150
151/* Figure out if any part of the guest state contained in minoff
152   .. maxoff requires precise memory exceptions.  If in doubt return
153   True (but this generates significantly slower code).  */
154Bool
155guest_s390x_state_requires_precise_mem_exns(Int minoff, Int maxoff)
156{
157   Int lr_min = S390X_GUEST_OFFSET(guest_LR);
158   Int lr_max = lr_min + 8 - 1;
159   Int sp_min = S390X_GUEST_OFFSET(guest_SP);
160   Int sp_max = sp_min + 8 - 1;
161   Int fp_min = S390X_GUEST_OFFSET(guest_FP);
162   Int fp_max = fp_min + 8 - 1;
163   Int ia_min = S390X_GUEST_OFFSET(guest_IA);
164   Int ia_max = ia_min + 8 - 1;
165
166   if (maxoff < sp_min || minoff > sp_max) {
167      /* No overlap with SP */
168      if (vex_control.iropt_register_updates == VexRegUpdSpAtMemAccess)
169         return False; // We only need to check stack pointer.
170   } else {
171      return True;
172   }
173
174   if (maxoff < lr_min || minoff > lr_max) {
175      /* No overlap with LR */
176   } else {
177      return True;
178   }
179
180   if (maxoff < fp_min || minoff > fp_max) {
181      /* No overlap with FP */
182   } else {
183      return True;
184   }
185
186   if (maxoff < ia_min || minoff > ia_max) {
187      /* No overlap with IA */
188   } else {
189      return True;
190   }
191
192   return False;
193}
194
195
196#define ALWAYSDEFD(field)                             \
197    { S390X_GUEST_OFFSET(field),            \
198      (sizeof ((VexGuestS390XState*)0)->field) }
199
200VexGuestLayout s390xGuest_layout = {
201
202   /* Total size of the guest state, in bytes. */
203   .total_sizeB = sizeof(VexGuestS390XState),
204
205   /* Describe the stack pointer. */
206   .offset_SP = S390X_GUEST_OFFSET(guest_SP),
207   .sizeof_SP = 8,
208
209   /* Describe the frame pointer. */
210   .offset_FP = S390X_GUEST_OFFSET(guest_FP),
211   .sizeof_FP = 8,
212
213   /* Describe the instruction pointer. */
214   .offset_IP = S390X_GUEST_OFFSET(guest_IA),
215   .sizeof_IP = 8,
216
217   /* Describe any sections to be regarded by Memcheck as
218      'always-defined'. */
219   .n_alwaysDefd = 9,
220
221   /* Flags thunk: OP and NDEP are always defined, whereas DEP1
222      and DEP2 have to be tracked.  See detailed comment in
223      gdefs.h on meaning of thunk fields. */
224   .alwaysDefd = {
225      /*  0 */ ALWAYSDEFD(guest_CC_OP),     /* generic */
226      /*  1 */ ALWAYSDEFD(guest_CC_NDEP),   /* generic */
227      /*  2 */ ALWAYSDEFD(guest_EMNOTE),    /* generic */
228      /*  3 */ ALWAYSDEFD(guest_CMSTART),   /* generic */
229      /*  4 */ ALWAYSDEFD(guest_CMLEN),     /* generic */
230      /*  5 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), /* generic */
231      /*  6 */ ALWAYSDEFD(guest_IA),        /* control reg */
232      /*  7 */ ALWAYSDEFD(guest_fpc),       /* control reg */
233      /*  8 */ ALWAYSDEFD(guest_counter),   /* internal usage register */
234   }
235};
236
237/*------------------------------------------------------------*/
238/*--- Dirty helper for EXecute                             ---*/
239/*------------------------------------------------------------*/
240void
241s390x_dirtyhelper_EX(ULong torun)
242{
243   last_execute_target = torun;
244}
245
246
247/*------------------------------------------------------------*/
248/*--- Dirty helper for Clock instructions                  ---*/
249/*------------------------------------------------------------*/
250#if defined(VGA_s390x)
251ULong
252s390x_dirtyhelper_STCK(ULong *addr)
253{
254   UInt cc;
255
256   asm volatile("stck %0\n"
257                "ipm %1\n"
258                "srl %1,28\n"
259                : "+Q" (*addr), "=d" (cc) : : "cc");
260   return cc;
261}
262
263ULong
264s390x_dirtyhelper_STCKE(ULong *addr)
265{
266   UInt cc;
267
268   asm volatile("stcke %0\n"
269                "ipm %1\n"
270                "srl %1,28\n"
271                : "+Q" (*addr), "=d" (cc) : : "cc");
272   return cc;
273}
274
275ULong s390x_dirtyhelper_STCKF(ULong *addr)
276{
277   UInt cc;
278
279   asm volatile(".insn s,0xb27c0000,%0\n"
280                "ipm %1\n"
281                "srl %1,28\n"
282                : "+Q" (*addr), "=d" (cc) : : "cc");
283   return cc;
284}
285#else
286ULong s390x_dirtyhelper_STCK(ULong *addr)  {return 3;}
287ULong s390x_dirtyhelper_STCKF(ULong *addr) {return 3;}
288ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;}
289#endif /* VGA_s390x */
290
291/*------------------------------------------------------------*/
292/*--- Dirty helper for Store Facility instruction          ---*/
293/*------------------------------------------------------------*/
294#if defined(VGA_s390x)
295static void
296s390_set_facility_bit(ULong *addr, UInt bitno, UInt value)
297{
298   addr  += bitno / 64;
299   bitno  = bitno % 64;
300
301   ULong mask = 1;
302   mask <<= (63 - bitno);
303
304   if (value == 1) {
305      *addr |= mask;   // set
306   } else {
307      *addr &= ~mask;  // clear
308   }
309}
310
311ULong
312s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
313{
314   ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i;
315   register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF;  /* r0[56:63] */
316
317   /* We cannot store more than S390_NUM_FACILITY_DW
318      (and it makes not much sense to do so anyhow) */
319   if (reg0 > S390_NUM_FACILITY_DW - 1)
320      reg0 = S390_NUM_FACILITY_DW - 1;
321
322   num_dw = reg0 + 1;  /* number of double words written */
323
324   asm volatile(" .insn s,0xb2b00000,%0\n"   /* stfle */
325                "ipm    %2\n"
326                "srl    %2,28\n"
327                : "=m" (hoststfle), "+d"(reg0), "=d"(cc) : : "cc", "memory");
328
329   /* Update guest register 0  with what STFLE set r0 to */
330   guest_state->guest_r0 = reg0;
331
332   /* Set default: VM facilities = host facilities */
333   for (i = 0; i < num_dw; ++i)
334      addr[i] = hoststfle[i];
335
336   /* Now adjust the VM facilities according to what the VM supports */
337   s390_set_facility_bit(addr, S390_FAC_LDISP,  1);
338   s390_set_facility_bit(addr, S390_FAC_EIMM,   1);
339   s390_set_facility_bit(addr, S390_FAC_ETF2,   1);
340   s390_set_facility_bit(addr, S390_FAC_ETF3,   1);
341   s390_set_facility_bit(addr, S390_FAC_GIE,    1);
342   s390_set_facility_bit(addr, S390_FAC_EXEXT,  1);
343   s390_set_facility_bit(addr, S390_FAC_HIGHW,  1);
344
345   s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0);
346   s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0);
347   s390_set_facility_bit(addr, S390_FAC_XCPUT,  0);
348   s390_set_facility_bit(addr, S390_FAC_MSA,    0);
349   s390_set_facility_bit(addr, S390_FAC_PENH,   0);
350   s390_set_facility_bit(addr, S390_FAC_DFP,    0);
351   s390_set_facility_bit(addr, S390_FAC_PFPO,   0);
352   s390_set_facility_bit(addr, S390_FAC_DFPZC,  0);
353   s390_set_facility_bit(addr, S390_FAC_MISC,   0);
354   s390_set_facility_bit(addr, S390_FAC_CTREXE, 0);
355   s390_set_facility_bit(addr, S390_FAC_TREXE,  0);
356   s390_set_facility_bit(addr, S390_FAC_MSA4,   0);
357
358   return cc;
359}
360
361#else
362
363ULong
364s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
365{
366   return 3;
367}
368#endif /* VGA_s390x */
369
370/*------------------------------------------------------------*/
371/*--- Dirty helper for the "convert unicode" insn family.  ---*/
372/*------------------------------------------------------------*/
373void
374s390x_dirtyhelper_CUxy(UChar *address, ULong data, ULong num_bytes)
375{
376   UInt i;
377
378   vassert(num_bytes >= 1 && num_bytes <= 4);
379
380   /* Store the least significant NUM_BYTES bytes in DATA left to right
381      at ADDRESS. */
382   for (i = 1; i <= num_bytes; ++i) {
383      address[num_bytes - i] = data & 0xff;
384      data >>= 8;
385   }
386}
387
388
389/*------------------------------------------------------------*/
390/*--- Clean helper for CU21.                               ---*/
391/*------------------------------------------------------------*/
392
393/* The function performs a CU21 operation. It returns three things
394   encoded in an ULong value:
395   - the converted bytes (at most 4)
396   - the number of converted bytes
397   - an indication whether LOW_SURROGATE, if any, is invalid
398
399   64      48                16           8                       0
400    +-------+-----------------+-----------+-----------------------+
401    |  0x0  | converted bytes | num_bytes | invalid_low_surrogate |
402    +-------+-----------------+-----------+-----------------------+
403*/
404ULong
405s390_do_cu21(UInt srcval, UInt low_surrogate)
406{
407   ULong retval = 0;   // shut up gcc
408   UInt b1, b2, b3, b4, num_bytes, invalid_low_surrogate = 0;
409
410   srcval &= 0xffff;
411
412   /* Determine the number of bytes in the converted value */
413   if (srcval <= 0x007f)
414      num_bytes = 1;
415   else if (srcval >= 0x0080 && srcval <= 0x07ff)
416      num_bytes = 2;
417   else if ((srcval >= 0x0800 && srcval <= 0xd7ff) ||
418            (srcval >= 0xdc00 && srcval <= 0xffff))
419      num_bytes = 3;
420   else
421      num_bytes = 4;
422
423   /* Determine UTF-8 bytes according to calculated num_bytes */
424   switch (num_bytes){
425   case 1:
426      retval = srcval;
427      break;
428
429   case 2:
430      /* order of bytes left to right: b1, b2 */
431      b1  = 0xc0;
432      b1 |= srcval >> 6;
433
434      b2  = 0x80;
435      b2 |= srcval & 0x3f;
436
437      retval = (b1 << 8) | b2;
438      break;
439
440   case 3:
441      /* order of bytes left to right: b1, b2, b3 */
442      b1  = 0xe0;
443      b1 |= srcval >> 12;
444
445      b2  = 0x80;
446      b2 |= (srcval >> 6) & 0x3f;
447
448      b3  = 0x80;
449      b3 |= srcval & 0x3f;
450
451      retval = (b1 << 16) | (b2 << 8) | b3;
452      break;
453
454   case 4: {
455      /* order of bytes left to right: b1, b2, b3, b4 */
456      UInt high_surrogate = srcval;
457      UInt uvwxy = ((high_surrogate >> 6) & 0xf) + 1;   // abcd + 1
458
459      b1  = 0xf0;
460      b1 |= uvwxy >> 2;     // uvw
461
462      b2  = 0x80;
463      b2 |= (uvwxy & 0x3) << 4;           // xy
464      b2 |= (high_surrogate >> 2) & 0xf;  // efgh
465
466      b3  = 0x80;
467      b3 |= (high_surrogate & 0x3) << 4;   // ij
468      b3 |= (low_surrogate >> 6) & 0xf;    // klmn
469
470      b4  = 0x80;
471      b4 |= low_surrogate & 0x3f;
472
473      retval = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
474
475      invalid_low_surrogate = (low_surrogate & 0xfc00) != 0xdc00;
476      break;
477   }
478   }
479
480   /* At this point RETVAL contains the converted bytes.
481      Build up the final return value. */
482   return (retval << 16) | (num_bytes << 8) | invalid_low_surrogate;
483}
484
485
486/*------------------------------------------------------------*/
487/*--- Clean helper for CU24.                               ---*/
488/*------------------------------------------------------------*/
489
490/* The function performs a CU24 operation. It returns two things
491   encoded in an ULong value:
492   - the 4 converted bytes
493   - an indication whether LOW_SURROGATE, if any, is invalid
494
495   64     40                 8                       0
496    +------------------------+-----------------------+
497    |  0x0 | converted bytes | invalid_low_surrogate |
498    +------------------------+-----------------------+
499*/
500ULong
501s390_do_cu24(UInt srcval, UInt low_surrogate)
502{
503   ULong retval;
504   UInt invalid_low_surrogate = 0;
505
506   srcval &= 0xffff;
507
508   if ((srcval >= 0x0000 && srcval <= 0xd7ff) ||
509       (srcval >= 0xdc00 && srcval <= 0xffff)) {
510      retval = srcval;
511   } else {
512      /* D800 - DBFF */
513      UInt high_surrogate = srcval;
514      UInt uvwxy  = ((high_surrogate >> 6) & 0xf) + 1;   // abcd + 1
515      UInt efghij = high_surrogate & 0x3f;
516      UInt klmnoprst = low_surrogate & 0x3ff;
517
518      retval = (uvwxy << 16) | (efghij << 10) | klmnoprst;
519
520      invalid_low_surrogate = (low_surrogate & 0xfc00) != 0xdc00;
521   }
522
523   /* At this point RETVAL contains the converted bytes.
524      Build up the final return value. */
525   return (retval << 8) | invalid_low_surrogate;
526}
527
528
529/*------------------------------------------------------------*/
530/*--- Clean helper for CU42.                               ---*/
531/*------------------------------------------------------------*/
532
533/* The function performs a CU42 operation. It returns three things
534   encoded in an ULong value:
535   - the converted bytes (at most 4)
536   - the number of converted bytes (2 or 4; 0 if invalid character)
537   - an indication whether the UTF-32 character is invalid
538
539   64      48                16           8                   0
540    +-------+-----------------+-----------+-------------------+
541    |  0x0  | converted bytes | num_bytes | invalid_character |
542    +-------+-----------------+-----------+-------------------+
543*/
544ULong
545s390_do_cu42(UInt srcval)
546{
547   ULong retval;
548   UInt num_bytes, invalid_character = 0;
549
550   if ((srcval >= 0x0000 && srcval <= 0xd7ff) ||
551       (srcval >= 0xdc00 && srcval <= 0xffff)) {
552      retval = srcval;
553      num_bytes = 2;
554   } else if (srcval >= 0x00010000 && srcval <= 0x0010FFFF) {
555      UInt uvwxy  = srcval >> 16;
556      UInt abcd   = (uvwxy - 1) & 0xf;
557      UInt efghij = (srcval >> 10) & 0x3f;
558
559      UInt high_surrogate = (0xd8 << 8) | (abcd << 6) | efghij;
560      UInt low_surrogate  = (0xdc << 8) | (srcval & 0x3ff);
561
562      retval = (high_surrogate << 16) | low_surrogate;
563      num_bytes = 4;
564   } else {
565      /* D800 - DBFF or 00110000 - FFFFFFFF */
566      invalid_character = 1;
567      retval = num_bytes = 0;   /* does not matter; not used */
568   }
569
570   /* At this point RETVAL contains the converted bytes.
571      Build up the final return value. */
572   return (retval << 16) | (num_bytes << 8) | invalid_character;
573}
574
575
576/*------------------------------------------------------------*/
577/*--- Clean helper for CU41.                               ---*/
578/*------------------------------------------------------------*/
579
580/* The function performs a CU41 operation. It returns three things
581   encoded in an ULong value:
582   - the converted bytes (at most 4)
583   - the number of converted bytes (1, 2, 3, or 4; 0 if invalid character)
584   - an indication whether the UTF-32 character is invalid
585
586   64      48                16           8                   0
587    +-------+-----------------+-----------+-------------------+
588    |  0x0  | converted bytes | num_bytes | invalid_character |
589    +-------+-----------------+-----------+-------------------+
590*/
591ULong
592s390_do_cu41(UInt srcval)
593{
594   ULong retval;
595   UInt num_bytes, invalid_character = 0;
596
597   if (srcval <= 0x7f) {
598      retval = srcval;
599      num_bytes = 1;
600   } else if (srcval >= 0x80 && srcval <= 0x7ff) {
601      UInt fghij  = srcval >> 6;
602      UInt klmnop = srcval & 0x3f;
603      UInt byte1  = (0xc0 | fghij);
604      UInt byte2  = (0x80 | klmnop);
605
606      retval = (byte1 << 8) | byte2;
607      num_bytes = 2;
608   } else if ((srcval >= 0x800  && srcval <= 0xd7ff) ||
609              (srcval >= 0xdc00 && srcval <= 0xffff)) {
610      UInt abcd   = srcval >> 12;
611      UInt efghij = (srcval >> 6) & 0x3f;
612      UInt klmnop = srcval & 0x3f;
613      UInt byte1  = 0xe0 | abcd;
614      UInt byte2  = 0x80 | efghij;
615      UInt byte3  = 0x80 | klmnop;
616
617      retval = (byte1 << 16) | (byte2 << 8) | byte3;
618      num_bytes = 3;
619   } else if (srcval >= 0x10000 && srcval <= 0x10ffff) {
620      UInt uvw    = (srcval >> 18) & 0x7;
621      UInt xy     = (srcval >> 16) & 0x3;
622      UInt efgh   = (srcval >> 12) & 0xf;
623      UInt ijklmn = (srcval >>  6) & 0x3f;
624      UInt opqrst = srcval & 0x3f;
625      UInt byte1  = 0xf0 | uvw;
626      UInt byte2  = 0x80 | (xy << 4) | efgh;
627      UInt byte3  = 0x80 | ijklmn;
628      UInt byte4  = 0x80 | opqrst;
629
630      retval = (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
631      num_bytes = 4;
632   } else {
633      /* d800 ... dbff or 00110000 ... ffffffff */
634      invalid_character = 1;
635
636      retval = 0;
637      num_bytes = 0;
638   }
639
640   /* At this point RETVAL contains the converted bytes.
641      Build up the final return value. */
642   return (retval << 16) | (num_bytes << 8) | invalid_character;
643}
644
645
646/*------------------------------------------------------------*/
647/*--- Clean helpers for CU12.                              ---*/
648/*------------------------------------------------------------*/
649
650/* The function looks at the first byte of an UTF-8 character and returns
651   two things encoded in an ULong value:
652
653   - the number of bytes that need to be read
654   - an indication whether the UTF-8 character is invalid
655
656   64      16           8                   0
657    +-------------------+-------------------+
658    |  0x0  | num_bytes | invalid_character |
659    +-------+-----------+-------------------+
660*/
661ULong
662s390_do_cu12_cu14_helper1(UInt byte, UInt etf3_and_m3_is_1)
663{
664   vassert(byte <= 0xff);
665
666   /* Check whether the character is invalid */
667   if (byte >= 0x80 && byte <= 0xbf) return 1;
668   if (byte >= 0xf8) return 1;
669
670   if (etf3_and_m3_is_1) {
671      if (byte == 0xc0 || byte == 0xc1) return 1;
672      if (byte >= 0xf5 && byte <= 0xf7) return 1;
673   }
674
675   /* Character is valid */
676   if (byte <= 0x7f) return 1 << 8;   // 1 byte
677   if (byte <= 0xdf) return 2 << 8;   // 2 bytes
678   if (byte <= 0xef) return 3 << 8;   // 3 bytes
679
680   return 4 << 8;  // 4 bytes
681}
682
683/* The function performs a CU12 or CU14 operation. BYTE1, BYTE2, etc are the
684   bytes as read from the input stream, left to right. BYTE1 is a valid
685   byte. The function returns three things encoded in an ULong value:
686
687   - the converted bytes
688   - the number of converted bytes (2 or 4; 0 if invalid character)
689   - an indication whether the UTF-16 character is invalid
690
691   64      48                16           8                   0
692    +-------+-----------------+-----------+-------------------+
693    |  0x0  | converted bytes | num_bytes | invalid_character |
694    +-------+-----------------+-----------+-------------------+
695*/
696static ULong
697s390_do_cu12_cu14_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
698                          ULong stuff, Bool is_cu12)
699{
700   UInt num_src_bytes = stuff >> 1, etf3_and_m3_is_1 = stuff & 0x1;
701   UInt num_bytes = 0, invalid_character = 0;
702   ULong retval = 0;
703
704   vassert(num_src_bytes <= 4);
705
706   switch (num_src_bytes) {
707   case 1:
708      num_bytes = 2;
709      retval = byte1;
710      break;
711
712   case 2: {
713      /* Test validity */
714      if (etf3_and_m3_is_1) {
715         if (byte2 < 0x80 || byte2 > 0xbf) {
716            invalid_character = 1;
717            break;
718         }
719      }
720
721      /* OK */
722      UInt fghij  = byte1 & 0x1f;
723      UInt klmnop = byte2 & 0x3f;
724
725      num_bytes = 2;
726      retval = (fghij << 6) | klmnop;
727      break;
728   }
729
730   case 3: {
731      /* Test validity */
732      if (etf3_and_m3_is_1) {
733         if (byte1 == 0xe0) {
734            if ((byte2 < 0xa0 || byte2 > 0xbf) ||
735                (byte3 < 0x80 || byte3 > 0xbf)) {
736               invalid_character = 1;
737               break;
738            }
739         }
740         if ((byte1 >= 0xe1 && byte1 <= 0xec) ||
741             byte1 == 0xee || byte1 == 0xef) {
742            if ((byte2 < 0x80 || byte2 > 0xbf) ||
743                (byte3 < 0x80 || byte3 > 0xbf)) {
744               invalid_character = 1;
745               break;
746            }
747         }
748         if (byte1 == 0xed) {
749            if ((byte2 < 0x80 || byte2 > 0x9f) ||
750                (byte3 < 0x80 || byte3 > 0xbf)) {
751               invalid_character = 1;
752               break;
753            }
754         }
755      }
756
757      /* OK */
758      UInt abcd   = byte1 & 0xf;
759      UInt efghij = byte2 & 0x3f;
760      UInt klmnop = byte3 & 0x3f;
761
762      num_bytes = 2;
763      retval = (abcd << 12) | (efghij << 6) | klmnop;
764      break;
765   }
766
767   case 4: {
768      /* Test validity */
769      if (etf3_and_m3_is_1) {
770         if (byte1 == 0xf0) {
771            if ((byte2 < 0x90 || byte2 > 0xbf) ||
772                (byte3 < 0x80 || byte3 > 0xbf) ||
773                (byte4 < 0x80 || byte4 > 0xbf)) {
774               invalid_character = 1;
775               break;
776            }
777         }
778         if (byte1 == 0xf1 || byte1 == 0xf2 || byte1 == 0xf3) {
779            if ((byte2 < 0x80 || byte2 > 0xbf) ||
780                (byte3 < 0x80 || byte3 > 0xbf) ||
781                (byte4 < 0x80 || byte4 > 0xbf)) {
782               invalid_character = 1;
783               break;
784            }
785         }
786         if (byte1 == 0xf4) {
787            if ((byte2 < 0x80 || byte2 > 0x8f) ||
788                (byte3 < 0x80 || byte3 > 0xbf) ||
789                (byte4 < 0x80 || byte4 > 0xbf)) {
790               invalid_character = 1;
791               break;
792            }
793         }
794      }
795
796      /* OK */
797      UInt uvw    = byte1 & 0x7;
798      UInt xy     = (byte2 >> 4) & 0x3;
799      UInt uvwxy  = (uvw << 2) | xy;
800      UInt efgh   = byte2 & 0xf;
801      UInt ij     = (byte3 >> 4) & 0x3;
802      UInt klmn   = byte3 & 0xf;
803      UInt opqrst = byte4 & 0x3f;
804
805      if (is_cu12) {
806         UInt abcd = (uvwxy - 1) & 0xf;
807         UInt high_surrogate = (0xd8 << 8) | (abcd << 6) | (efgh << 2) | ij;
808         UInt low_surrogate  = (0xdc << 8) | (klmn << 6) | opqrst;
809
810         num_bytes = 4;
811         retval = (high_surrogate << 16) | low_surrogate;
812      } else {
813         num_bytes = 4;
814         retval =
815            (uvwxy << 16) | (efgh << 12) | (ij << 10) | (klmn << 6) | opqrst;
816      }
817      break;
818   }
819   }
820
821   if (! is_cu12) num_bytes = 4;   // for CU14, by definition
822
823   /* At this point RETVAL contains the converted bytes.
824      Build up the final return value. */
825   return (retval << 16) | (num_bytes << 8) | invalid_character;
826}
827
828ULong
829s390_do_cu12_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
830                     ULong stuff)
831{
832   return s390_do_cu12_cu14_helper2(byte1, byte2, byte3, byte4, stuff,
833                                    /* is_cu12 = */ 1);
834}
835
836ULong
837s390_do_cu14_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
838                     ULong stuff)
839{
840   return s390_do_cu12_cu14_helper2(byte1, byte2, byte3, byte4, stuff,
841                                    /* is_cu12 = */ 0);
842}
843
844
845/*------------------------------------------------------------*/
846/*--- Clean helper for "convert to binary".                ---*/
847/*------------------------------------------------------------*/
848#if defined(VGA_s390x)
849UInt
850s390_do_cvb(ULong decimal)
851{
852   UInt binary;
853
854   __asm__ volatile (
855        "cvb %[result],%[input]\n\t"
856          : [result] "=d"(binary)
857          : [input] "m"(decimal)
858   );
859
860   return binary;
861}
862
863#else
864UInt s390_do_cvb(ULong decimal) { return 0; }
865#endif
866
867
868/*------------------------------------------------------------*/
869/*--- Clean helper for "convert to decimal".                ---*/
870/*------------------------------------------------------------*/
871#if defined(VGA_s390x)
872ULong
873s390_do_cvd(ULong binary_in)
874{
875   UInt binary = binary_in & 0xffffffffULL;
876   ULong decimal;
877
878   __asm__ volatile (
879        "cvd %[input],%[result]\n\t"
880          : [result] "=m"(decimal)
881          : [input] "d"(binary)
882   );
883
884   return decimal;
885}
886
887#else
888ULong s390_do_cvd(ULong binary) { return 0; }
889#endif
890
891/*------------------------------------------------------------*/
892/*--- Clean helper for "Extract cache attribute".          ---*/
893/*------------------------------------------------------------*/
894#if defined(VGA_s390x)
895ULong
896s390_do_ecag(ULong op2addr)
897{
898   ULong result;
899
900   __asm__ volatile(".insn rsy,0xEB000000004C,%[out],0,0(%[in])\n\t"
901                    : [out] "=d"(result)
902                    : [in] "d"(op2addr));
903   return result;
904}
905
906#else
907ULong s390_do_ecag(ULong op2addr) { return 0; }
908#endif
909
910/*------------------------------------------------------------*/
911/*--- Clean helper for "Perform Floating Point Operation". ---*/
912/*------------------------------------------------------------*/
913#if defined(VGA_s390x)
914UInt
915s390_do_pfpo(UInt gpr0)
916{
917   UChar rm;
918   UChar op1_ty, op2_ty;
919
920   rm  = gpr0 & 0xf;
921   if (rm > 1 && rm < 8)
922      return EmFail_S390X_invalid_PFPO_rounding_mode;
923
924   op1_ty = (gpr0 >> 16) & 0xff; // gpr0[40:47]
925   op2_ty = (gpr0 >> 8)  & 0xff; // gpr0[48:55]
926   /* Operand type must be BFP 32, 64, 128 or DFP 32, 64, 128
927      which correspond to 0x5, 0x6, 0x7, 0x8, 0x9, 0xa respectively.
928      Any other operand type value is unsupported */
929   if ((op1_ty == op2_ty) ||
930       (op1_ty < 0x5 || op1_ty > 0xa) ||
931       (op2_ty < 0x5 || op2_ty > 0xa))
932      return EmFail_S390X_invalid_PFPO_function;
933
934   return EmNote_NONE;
935}
936#else
937UInt s390_do_pfpo(UInt gpr0) { return 0; }
938#endif
939
940/*------------------------------------------------------------*/
941/*--- Helper for condition code.                           ---*/
942/*------------------------------------------------------------*/
943
944/* Convert an IRRoundingMode value to s390_bfp_round_t */
945#if defined(VGA_s390x)
946static s390_bfp_round_t
947decode_bfp_rounding_mode(UInt irrm)
948{
949   switch (irrm) {
950   case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN;
951   case Irrm_NegINF:  return S390_BFP_ROUND_NEGINF;
952   case Irrm_PosINF:  return S390_BFP_ROUND_POSINF;
953   case Irrm_ZERO:    return S390_BFP_ROUND_ZERO;
954   }
955   vpanic("decode_bfp_rounding_mode");
956}
957#endif
958
959
960#define S390_CC_FOR_BINARY(opcode,cc_dep1,cc_dep2) \
961({ \
962   __asm__ volatile ( \
963        opcode " %[op1],%[op2]\n\t" \
964        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+d"(cc_dep1) \
965                                   : [op2] "d"(cc_dep2) \
966                                   : "cc");\
967   psw >> 28;   /* cc */ \
968})
969
970#define S390_CC_FOR_TERNARY_SUBB(opcode,cc_dep1,cc_dep2,cc_ndep) \
971({ \
972   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
973      for rationale. */ \
974   cc_dep2 = cc_dep2 ^ cc_ndep; \
975   __asm__ volatile ( \
976	"lghi 0,1\n\t" \
977	"sr 0,%[op3]\n\t" /* borrow to cc */ \
978        opcode " %[op1],%[op2]\n\t" /* then redo the op */\
979        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
980                                   : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
981                                   : "0", "cc");\
982   psw >> 28;   /* cc */ \
983})
984
985#define S390_CC_FOR_TERNARY_ADDC(opcode,cc_dep1,cc_dep2,cc_ndep) \
986({ \
987   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
988      for rationale. */ \
989   cc_dep2 = cc_dep2 ^ cc_ndep; \
990   __asm__ volatile ( \
991	"lgfr 0,%[op3]\n\t" /* first load cc_ndep */ \
992	"aghi 0,0\n\t" /* and convert it into a cc */ \
993        opcode " %[op1],%[op2]\n\t" /* then redo the op */\
994        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
995                                   : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
996                                   : "0", "cc");\
997   psw >> 28;   /* cc */ \
998})
999
1000
1001#define S390_CC_FOR_BFP_RESULT(opcode,cc_dep1) \
1002({ \
1003   __asm__ volatile ( \
1004        opcode " 0,%[op]\n\t" \
1005        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1006                                   : [op]  "f"(cc_dep1) \
1007                                   : "cc", "f0");\
1008   psw >> 28;   /* cc */ \
1009})
1010
1011#define S390_CC_FOR_BFP128_RESULT(hi,lo) \
1012({ \
1013   __asm__ volatile ( \
1014        "ldr   4,%[high]\n\t" \
1015        "ldr   6,%[low]\n\t" \
1016        "ltxbr 0,4\n\t" \
1017        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1018                                   : [high] "f"(hi), [low] "f"(lo) \
1019                                   : "cc", "f0", "f2", "f4", "f6");\
1020   psw >> 28;   /* cc */ \
1021})
1022
1023#define S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode) \
1024({ \
1025   __asm__ volatile ( \
1026        opcode " 0," #rounding_mode ",%[op]\n\t" \
1027        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1028                                   : [op]  "f"(cc_dep1) \
1029                                   : "cc", "r0");\
1030   psw >> 28;   /* cc */ \
1031})
1032
1033#define S390_CC_FOR_BFP_CONVERT(opcode,cc_dep1,cc_dep2)   \
1034({                                                        \
1035   UInt cc;                                               \
1036   switch (decode_bfp_rounding_mode(cc_dep2)) {           \
1037   case S390_BFP_ROUND_NEAREST_EVEN:                      \
1038      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,4); \
1039      break;                                              \
1040   case S390_BFP_ROUND_ZERO:                              \
1041      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,5); \
1042      break;                                              \
1043   case S390_BFP_ROUND_POSINF:                            \
1044      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,6); \
1045      break;                                              \
1046   case S390_BFP_ROUND_NEGINF:                            \
1047      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,7); \
1048      break;                                              \
1049   default:                                               \
1050      vpanic("unexpected bfp rounding mode");             \
1051   }                                                      \
1052   cc;                                                    \
1053})
1054
1055#define S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode) \
1056({ \
1057   __asm__ volatile ( \
1058        opcode ",0,%[op]," #rounding_mode ",0\n\t" \
1059        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1060                                   : [op]  "f"(cc_dep1) \
1061                                   : "cc", "r0");\
1062   psw >> 28;   /* cc */ \
1063})
1064
1065#define S390_CC_FOR_BFP_UCONVERT(opcode,cc_dep1,cc_dep2)   \
1066({                                                         \
1067   UInt cc;                                                \
1068   switch (decode_bfp_rounding_mode(cc_dep2)) {            \
1069   case S390_BFP_ROUND_NEAREST_EVEN:                       \
1070      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,4); \
1071      break;                                               \
1072   case S390_BFP_ROUND_ZERO:                               \
1073      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,5); \
1074      break;                                               \
1075   case S390_BFP_ROUND_POSINF:                             \
1076      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,6); \
1077      break;                                               \
1078   case S390_BFP_ROUND_NEGINF:                             \
1079      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,7); \
1080      break;                                               \
1081   default:                                                \
1082      vpanic("unexpected bfp rounding mode");              \
1083   }                                                       \
1084   cc;                                                     \
1085})
1086
1087#define S390_CC_FOR_BFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode) \
1088({ \
1089   __asm__ volatile ( \
1090        "ldr   4,%[high]\n\t" \
1091        "ldr   6,%[low]\n\t" \
1092        opcode " 0," #rounding_mode ",4\n\t" \
1093        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1094                                   : [high] "f"(hi), [low] "f"(lo) \
1095                                   : "cc", "r0", "f4", "f6");\
1096   psw >> 28;   /* cc */ \
1097})
1098
1099#define S390_CC_FOR_BFP128_CONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)   \
1100({                                                                   \
1101   UInt cc;                                                          \
1102   /* Recover the original DEP2 value. See comment near              \
1103      s390_cc_thunk_put3 for rationale. */                           \
1104   cc_dep2 = cc_dep2 ^ cc_ndep;                                      \
1105   switch (decode_bfp_rounding_mode(cc_ndep)) {                      \
1106   case S390_BFP_ROUND_NEAREST_EVEN:                                 \
1107      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,4); \
1108      break;                                                         \
1109   case S390_BFP_ROUND_ZERO:                                         \
1110      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,5); \
1111      break;                                                         \
1112   case S390_BFP_ROUND_POSINF:                                       \
1113      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,6); \
1114      break;                                                         \
1115   case S390_BFP_ROUND_NEGINF:                                       \
1116      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,7); \
1117      break;                                                         \
1118   default:                                                          \
1119      vpanic("unexpected bfp rounding mode");                        \
1120   }                                                                 \
1121   cc;                                                               \
1122})
1123
1124#define S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode) \
1125({ \
1126   __asm__ volatile ( \
1127        "ldr   4,%[high]\n\t" \
1128        "ldr   6,%[low]\n\t" \
1129        opcode ",0,4," #rounding_mode ",0\n\t" \
1130        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1131                                   : [high] "f"(hi), [low] "f"(lo) \
1132                                   : "cc", "r0", "f4", "f6");\
1133   psw >> 28;   /* cc */ \
1134})
1135
1136#define S390_CC_FOR_BFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)   \
1137({                                                                    \
1138   UInt cc;                                                           \
1139   /* Recover the original DEP2 value. See comment near               \
1140      s390_cc_thunk_put3 for rationale. */                            \
1141   cc_dep2 = cc_dep2 ^ cc_ndep;                                       \
1142   switch (decode_bfp_rounding_mode(cc_ndep)) {                       \
1143   case S390_BFP_ROUND_NEAREST_EVEN:                                  \
1144      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,4); \
1145      break;                                                          \
1146   case S390_BFP_ROUND_ZERO:                                          \
1147      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,5); \
1148      break;                                                          \
1149   case S390_BFP_ROUND_POSINF:                                        \
1150      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,6); \
1151      break;                                                          \
1152   case S390_BFP_ROUND_NEGINF:                                        \
1153      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,7); \
1154      break;                                                          \
1155   default:                                                           \
1156      vpanic("unexpected bfp rounding mode");                         \
1157   }                                                                  \
1158   cc;                                                                \
1159})
1160
1161#define S390_CC_FOR_BFP_TDC(opcode,cc_dep1,cc_dep2) \
1162({ \
1163   __asm__ volatile ( \
1164        opcode " %[value],0(%[class])\n\t" \
1165        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1166                                   : [value] "f"(cc_dep1), \
1167                                     [class] "a"(cc_dep2)  \
1168                                   : "cc");\
1169   psw >> 28;   /* cc */ \
1170})
1171
1172#define S390_CC_FOR_BFP128_TDC(cc_dep1,cc_dep2,cc_ndep) \
1173({ \
1174   /* Recover the original DEP2 value. See comment near \
1175      s390_cc_thunk_put1f128Z for rationale. */ \
1176   cc_dep2 = cc_dep2 ^ cc_ndep; \
1177   __asm__ volatile ( \
1178        "ldr  4,%[high]\n\t" \
1179        "ldr  6,%[low]\n\t" \
1180        "tcxb 4,0(%[class])\n\t" \
1181        "ipm  %[psw]\n\t"          : [psw] "=d"(psw) \
1182                                   : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
1183                                     [class] "a"(cc_ndep)  \
1184                                   : "cc", "f4", "f6");\
1185   psw >> 28;   /* cc */ \
1186})
1187
1188/* Convert an IRRoundingMode value to s390_dfp_round_t */
1189#if defined(VGA_s390x)
1190static s390_dfp_round_t
1191decode_dfp_rounding_mode(UInt irrm)
1192{
1193   switch (irrm) {
1194   case Irrm_NEAREST:
1195      return S390_DFP_ROUND_NEAREST_EVEN_4;
1196   case Irrm_NegINF:
1197      return S390_DFP_ROUND_NEGINF_7;
1198   case Irrm_PosINF:
1199      return S390_DFP_ROUND_POSINF_6;
1200   case Irrm_ZERO:
1201      return S390_DFP_ROUND_ZERO_5;
1202   case Irrm_NEAREST_TIE_AWAY_0:
1203      return S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1;
1204   case Irrm_PREPARE_SHORTER:
1205      return S390_DFP_ROUND_PREPARE_SHORT_3;
1206   case Irrm_AWAY_FROM_ZERO:
1207      return S390_DFP_ROUND_AWAY_0;
1208   case Irrm_NEAREST_TIE_TOWARD_0:
1209      return S390_DFP_ROUND_NEAREST_TIE_TOWARD_0;
1210   }
1211   vpanic("decode_dfp_rounding_mode");
1212}
1213#endif
1214
1215#define S390_CC_FOR_DFP_RESULT(cc_dep1) \
1216({ \
1217   __asm__ volatile ( \
1218        ".insn rre, 0xb3d60000,0,%[op]\n\t"              /* LTDTR */ \
1219        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1220                                   : [op]  "f"(cc_dep1) \
1221                                   : "cc", "f0"); \
1222   psw >> 28;   /* cc */ \
1223})
1224
1225#define S390_CC_FOR_DFP128_RESULT(hi,lo) \
1226({ \
1227   __asm__ volatile ( \
1228        "ldr   4,%[high]\n\t"                                           \
1229        "ldr   6,%[low]\n\t"                                            \
1230        ".insn rre, 0xb3de0000,0,4\n\t"    /* LTXTR */                  \
1231        "ipm %[psw]\n\t"           : [psw] "=d"(psw)                    \
1232                                   : [high] "f"(hi), [low] "f"(lo)      \
1233                                   : "cc", "f0", "f2", "f4", "f6");     \
1234   psw >> 28;   /* cc */                                                \
1235})
1236
1237#define S390_CC_FOR_DFP_TD(opcode,cc_dep1,cc_dep2)                      \
1238({                                                                      \
1239   __asm__ volatile (                                                   \
1240        opcode ",%[value],0(%[class])\n\t"                              \
1241        "ipm %[psw]\n\t"           : [psw] "=d"(psw)                    \
1242                                   : [value] "f"(cc_dep1),              \
1243                                     [class] "a"(cc_dep2)               \
1244                                   : "cc");                             \
1245   psw >> 28;   /* cc */                                                \
1246})
1247
1248#define S390_CC_FOR_DFP128_TD(opcode,cc_dep1,cc_dep2,cc_ndep)           \
1249({                                                                      \
1250   /* Recover the original DEP2 value. See comment near                 \
1251      s390_cc_thunk_put1d128Z for rationale. */                         \
1252   cc_dep2 = cc_dep2 ^ cc_ndep;                                         \
1253   __asm__ volatile (                                                   \
1254        "ldr  4,%[high]\n\t"                                            \
1255        "ldr  6,%[low]\n\t"                                             \
1256        opcode ",4,0(%[class])\n\t"                                     \
1257        "ipm  %[psw]\n\t"          : [psw] "=d"(psw)                    \
1258                                   : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
1259                                     [class] "a"(cc_ndep)               \
1260                                   : "cc", "f4", "f6");                 \
1261   psw >> 28;   /* cc */                                                \
1262})
1263
1264#define S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode)       \
1265   ({                                                                   \
1266      __asm__ volatile (                                                \
1267                        opcode ",0,%[op]," #rounding_mode ",0\n\t"      \
1268                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1269                        : [op] "f"(cc_dep1)                             \
1270                        : "cc", "r0");                                  \
1271      psw >> 28;   /* cc */                                             \
1272   })
1273
1274#define S390_CC_FOR_DFP_CONVERT(opcode,cc_dep1,cc_dep2)                 \
1275   ({                                                                   \
1276      UInt cc;                                                          \
1277      switch (decode_dfp_rounding_mode(cc_dep2)) {                      \
1278      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                         \
1279      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                        \
1280         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,1);            \
1281         break;                                                         \
1282      case S390_DFP_ROUND_PREPARE_SHORT_3:                              \
1283      case S390_DFP_ROUND_PREPARE_SHORT_15:                             \
1284         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,3);            \
1285         break;                                                         \
1286      case S390_DFP_ROUND_NEAREST_EVEN_4:                               \
1287      case S390_DFP_ROUND_NEAREST_EVEN_8:                               \
1288         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,4);            \
1289         break;                                                         \
1290      case S390_DFP_ROUND_ZERO_5:                                       \
1291      case S390_DFP_ROUND_ZERO_9:                                       \
1292         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,5);            \
1293         break;                                                         \
1294      case S390_DFP_ROUND_POSINF_6:                                     \
1295      case S390_DFP_ROUND_POSINF_10:                                    \
1296         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,6);            \
1297         break;                                                         \
1298      case S390_DFP_ROUND_NEGINF_7:                                     \
1299      case S390_DFP_ROUND_NEGINF_11:                                    \
1300         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,7);            \
1301         break;                                                         \
1302      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                         \
1303         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,13);           \
1304         break;                                                         \
1305      case S390_DFP_ROUND_AWAY_0:                                       \
1306         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,14);           \
1307         break;                                                         \
1308      default:                                                          \
1309         vpanic("unexpected dfp rounding mode");                        \
1310      }                                                                 \
1311      cc;                                                               \
1312   })
1313
1314#define S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode)      \
1315   ({                                                                   \
1316      __asm__ volatile (                                                \
1317                        opcode ",0,%[op]," #rounding_mode ",0\n\t"      \
1318                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1319                        : [op] "f"(cc_dep1)                             \
1320                        : "cc", "r0");                                  \
1321      psw >> 28;   /* cc */                                             \
1322   })
1323
1324#define S390_CC_FOR_DFP_UCONVERT(opcode,cc_dep1,cc_dep2)                \
1325   ({                                                                   \
1326      UInt cc;                                                          \
1327      switch (decode_dfp_rounding_mode(cc_dep2)) {                      \
1328      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                         \
1329      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                        \
1330         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,1);           \
1331         break;                                                         \
1332      case S390_DFP_ROUND_PREPARE_SHORT_3:                              \
1333      case S390_DFP_ROUND_PREPARE_SHORT_15:                             \
1334         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,3);           \
1335         break;                                                         \
1336      case S390_DFP_ROUND_NEAREST_EVEN_4:                               \
1337      case S390_DFP_ROUND_NEAREST_EVEN_8:                               \
1338         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,4);           \
1339         break;                                                         \
1340      case S390_DFP_ROUND_ZERO_5:                                       \
1341      case S390_DFP_ROUND_ZERO_9:                                       \
1342         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,5);           \
1343         break;                                                         \
1344      case S390_DFP_ROUND_POSINF_6:                                     \
1345      case S390_DFP_ROUND_POSINF_10:                                    \
1346         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,6);           \
1347         break;                                                         \
1348      case S390_DFP_ROUND_NEGINF_7:                                     \
1349      case S390_DFP_ROUND_NEGINF_11:                                    \
1350         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,7);           \
1351         break;                                                         \
1352      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                         \
1353         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,13);          \
1354         break;                                                         \
1355      case S390_DFP_ROUND_AWAY_0:                                       \
1356         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,14);          \
1357         break;                                                         \
1358      default:                                                          \
1359         vpanic("unexpected dfp rounding mode");                        \
1360      }                                                                 \
1361      cc;                                                               \
1362   })
1363
1364#define S390_CC_FOR_DFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode)      \
1365   ({                                                                   \
1366      __asm__ volatile (                                                \
1367                        "ldr   4,%[high]\n\t"                           \
1368                        "ldr   6,%[low]\n\t"                            \
1369                        opcode ",0,4," #rounding_mode ",0\n\t"          \
1370                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1371                        : [high] "f"(hi), [low] "f"(lo)                 \
1372                        : "cc", "r0", "f4", "f6");                      \
1373      psw >> 28;   /* cc */                                             \
1374   })
1375
1376#define S390_CC_FOR_DFP128_CONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)       \
1377   ({                                                                    \
1378      UInt cc;                                                           \
1379      /* Recover the original DEP2 value. See comment near               \
1380         s390_cc_thunk_put3 for rationale. */                            \
1381      cc_dep2 = cc_dep2 ^ cc_ndep;                                       \
1382      switch (decode_dfp_rounding_mode(cc_ndep)) {                       \
1383      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                          \
1384      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                         \
1385         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,1);  \
1386         break;                                                          \
1387      case S390_DFP_ROUND_PREPARE_SHORT_3:                               \
1388      case S390_DFP_ROUND_PREPARE_SHORT_15:                              \
1389         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,3);  \
1390         break;                                                          \
1391      case S390_DFP_ROUND_NEAREST_EVEN_4:                                \
1392      case S390_DFP_ROUND_NEAREST_EVEN_8:                                \
1393         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,4);  \
1394         break;                                                          \
1395      case S390_DFP_ROUND_ZERO_5:                                        \
1396      case S390_DFP_ROUND_ZERO_9:                                        \
1397         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,5);  \
1398         break;                                                          \
1399      case S390_DFP_ROUND_POSINF_6:                                      \
1400      case S390_DFP_ROUND_POSINF_10:                                     \
1401         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,6);  \
1402         break;                                                          \
1403      case S390_DFP_ROUND_NEGINF_7:                                      \
1404      case S390_DFP_ROUND_NEGINF_11:                                     \
1405         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,7);  \
1406         break;                                                          \
1407      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                          \
1408         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,13); \
1409         break;                                                          \
1410      case S390_DFP_ROUND_AWAY_0:                                        \
1411         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,14); \
1412         break;                                                          \
1413      default:                                                           \
1414         vpanic("unexpected dfp rounding mode");                         \
1415      }                                                                  \
1416      cc;                                                                \
1417   })
1418
1419#define S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode)      \
1420   ({                                                                    \
1421      __asm__ volatile (                                                 \
1422                        "ldr   4,%[high]\n\t"                            \
1423                        "ldr   6,%[low]\n\t"                             \
1424                        opcode ",0,4," #rounding_mode ",0\n\t"           \
1425                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)     \
1426                        : [high] "f"(hi), [low] "f"(lo)                  \
1427                        : "cc", "r0", "f4", "f6");                       \
1428      psw >> 28;   /* cc */                                              \
1429   })
1430
1431#define S390_CC_FOR_DFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)       \
1432   ({                                                                     \
1433      UInt cc;                                                            \
1434      /* Recover the original DEP2 value. See comment near                \
1435         s390_cc_thunk_put3 for rationale. */                             \
1436      cc_dep2 = cc_dep2 ^ cc_ndep;                                        \
1437      switch (decode_dfp_rounding_mode(cc_ndep)) {                        \
1438      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                           \
1439      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                          \
1440         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,1);  \
1441         break;                                                           \
1442      case S390_DFP_ROUND_PREPARE_SHORT_3:                                \
1443      case S390_DFP_ROUND_PREPARE_SHORT_15:                               \
1444         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,3);  \
1445         break;                                                           \
1446      case S390_DFP_ROUND_NEAREST_EVEN_4:                                 \
1447      case S390_DFP_ROUND_NEAREST_EVEN_8:                                 \
1448         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,4);  \
1449         break;                                                           \
1450      case S390_DFP_ROUND_ZERO_5:                                         \
1451      case S390_DFP_ROUND_ZERO_9:                                         \
1452         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,5);  \
1453         break;                                                           \
1454      case S390_DFP_ROUND_POSINF_6:                                       \
1455      case S390_DFP_ROUND_POSINF_10:                                      \
1456         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,6);  \
1457         break;                                                           \
1458      case S390_DFP_ROUND_NEGINF_7:                                       \
1459      case S390_DFP_ROUND_NEGINF_11:                                      \
1460         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,7);  \
1461         break;                                                           \
1462      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                           \
1463         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,13); \
1464         break;                                                           \
1465      case S390_DFP_ROUND_AWAY_0:                                         \
1466         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,14); \
1467         break;                                                           \
1468      default:                                                            \
1469         vpanic("unexpected dfp rounding mode");                          \
1470      }                                                                   \
1471      cc;                                                                 \
1472   })
1473
1474
1475/* Return the value of the condition code from the supplied thunk parameters.
1476   This is not the value of the PSW. It is the value of the 2 CC bits within
1477   the PSW. The returned value is thusly in the interval [0:3]. */
1478UInt
1479s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep)
1480{
1481#if defined(VGA_s390x)
1482   UInt psw;
1483
1484   switch (cc_op) {
1485
1486   case S390_CC_OP_BITWISE:
1487      return S390_CC_FOR_BINARY("ogr", cc_dep1, (ULong)0);
1488
1489   case S390_CC_OP_SIGNED_COMPARE:
1490      return S390_CC_FOR_BINARY("cgr", cc_dep1, cc_dep2);
1491
1492   case S390_CC_OP_UNSIGNED_COMPARE:
1493      return S390_CC_FOR_BINARY("clgr", cc_dep1, cc_dep2);
1494
1495   case S390_CC_OP_SIGNED_ADD_64:
1496      return S390_CC_FOR_BINARY("agr", cc_dep1, cc_dep2);
1497
1498   case S390_CC_OP_SIGNED_ADD_32:
1499      return S390_CC_FOR_BINARY("ar", cc_dep1, cc_dep2);
1500
1501   case S390_CC_OP_SIGNED_SUB_64:
1502      return S390_CC_FOR_BINARY("sgr", cc_dep1, cc_dep2);
1503
1504   case S390_CC_OP_SIGNED_SUB_32:
1505      return S390_CC_FOR_BINARY("sr", cc_dep1, cc_dep2);
1506
1507   case S390_CC_OP_UNSIGNED_ADD_64:
1508      return S390_CC_FOR_BINARY("algr", cc_dep1, cc_dep2);
1509
1510   case S390_CC_OP_UNSIGNED_ADD_32:
1511      return S390_CC_FOR_BINARY("alr", cc_dep1, cc_dep2);
1512
1513   case S390_CC_OP_UNSIGNED_ADDC_64:
1514      return S390_CC_FOR_TERNARY_ADDC("alcgr", cc_dep1, cc_dep2, cc_ndep);
1515
1516   case S390_CC_OP_UNSIGNED_ADDC_32:
1517      return S390_CC_FOR_TERNARY_ADDC("alcr", cc_dep1, cc_dep2, cc_ndep);
1518
1519   case S390_CC_OP_UNSIGNED_SUB_64:
1520      return S390_CC_FOR_BINARY("slgr", cc_dep1, cc_dep2);
1521
1522   case S390_CC_OP_UNSIGNED_SUB_32:
1523      return S390_CC_FOR_BINARY("slr", cc_dep1, cc_dep2);
1524
1525   case S390_CC_OP_UNSIGNED_SUBB_64:
1526      return S390_CC_FOR_TERNARY_SUBB("slbgr", cc_dep1, cc_dep2, cc_ndep);
1527
1528   case S390_CC_OP_UNSIGNED_SUBB_32:
1529      return S390_CC_FOR_TERNARY_SUBB("slbr", cc_dep1, cc_dep2, cc_ndep);
1530
1531   case S390_CC_OP_LOAD_AND_TEST:
1532      /* Like signed comparison with 0 */
1533      return S390_CC_FOR_BINARY("cgr", cc_dep1, (Long)0);
1534
1535   case S390_CC_OP_LOAD_POSITIVE_32:
1536      __asm__ volatile (
1537           "lpr  %[result],%[op]\n\t"
1538           "ipm  %[psw]\n\t"         : [psw] "=d"(psw), [result] "=d"(cc_dep1)
1539                                     : [op] "d"(cc_dep1)
1540                                     : "cc");
1541      return psw >> 28;   /* cc */
1542
1543   case S390_CC_OP_LOAD_POSITIVE_64:
1544      __asm__ volatile (
1545           "lpgr %[result],%[op]\n\t"
1546           "ipm  %[psw]\n\t"         : [psw] "=d"(psw), [result] "=d"(cc_dep1)
1547                                     : [op] "d"(cc_dep1)
1548                                     : "cc");
1549      return psw >> 28;   /* cc */
1550
1551   case S390_CC_OP_TEST_UNDER_MASK_8: {
1552      UChar value  = cc_dep1;
1553      UChar mask   = cc_dep2;
1554
1555      __asm__ volatile (
1556           "bras %%r2,1f\n\t"             /* %r2 = address of next insn */
1557           "tm %[value],0\n\t"            /* this is skipped, then EXecuted */
1558           "1: ex %[mask],0(%%r2)\n\t"    /* EXecute TM after modifying mask */
1559           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1560                                        : [value] "m"(value), [mask] "a"(mask)
1561                                        : "r2", "cc");
1562      return psw >> 28;   /* cc */
1563   }
1564
1565   case S390_CC_OP_TEST_UNDER_MASK_16: {
1566      /* Create a TMLL insn with the mask as given by cc_dep2 */
1567      UInt insn  = (0xA701 << 16) | cc_dep2;
1568      UInt value = cc_dep1;
1569
1570      __asm__ volatile (
1571           "lr   1,%[value]\n\t"
1572           "lhi  2,0x10\n\t"
1573           "ex   2,%[insn]\n\t"
1574           "ipm  %[psw]\n\t"       : [psw] "=d"(psw)
1575                                   : [value] "d"(value), [insn] "m"(insn)
1576                                   : "r1", "r2", "cc");
1577      return psw >> 28;   /* cc */
1578   }
1579
1580   case S390_CC_OP_SHIFT_LEFT_32:
1581      __asm__ volatile (
1582           "sla  %[op],0(%[amount])\n\t"
1583           "ipm  %[psw]\n\t"            : [psw] "=d"(psw), [op] "+d"(cc_dep1)
1584                                        : [amount] "a"(cc_dep2)
1585                                        : "cc");
1586      return psw >> 28;   /* cc */
1587
1588   case S390_CC_OP_SHIFT_LEFT_64: {
1589      Int high = (Int)(cc_dep1 >> 32);
1590      Int low  = (Int)(cc_dep1 & 0xFFFFFFFF);
1591
1592      __asm__ volatile (
1593           "lr   2,%[high]\n\t"
1594           "lr   3,%[low]\n\t"
1595           "slda 2,0(%[amount])\n\t"
1596           "ipm %[psw]\n\t"             : [psw] "=d"(psw), [high] "+d"(high),
1597                                          [low] "+d"(low)
1598                                        : [amount] "a"(cc_dep2)
1599                                        : "cc", "r2", "r3");
1600      return psw >> 28;   /* cc */
1601   }
1602
1603   case S390_CC_OP_INSERT_CHAR_MASK_32: {
1604      Int inserted = 0;
1605      Int msb = 0;
1606
1607      if (cc_dep2 & 1) {
1608         inserted |= cc_dep1 & 0xff;
1609         msb = 0x80;
1610      }
1611      if (cc_dep2 & 2) {
1612         inserted |= cc_dep1 & 0xff00;
1613         msb = 0x8000;
1614      }
1615      if (cc_dep2 & 4) {
1616         inserted |= cc_dep1 & 0xff0000;
1617         msb = 0x800000;
1618      }
1619      if (cc_dep2 & 8) {
1620         inserted |= cc_dep1 & 0xff000000;
1621         msb = 0x80000000;
1622      }
1623
1624      if (inserted & msb)  // MSB is 1
1625         return 1;
1626      if (inserted > 0)
1627         return 2;
1628      return 0;
1629   }
1630
1631   case S390_CC_OP_BFP_RESULT_32:
1632      return S390_CC_FOR_BFP_RESULT("ltebr", cc_dep1);
1633
1634   case S390_CC_OP_BFP_RESULT_64:
1635      return S390_CC_FOR_BFP_RESULT("ltdbr", cc_dep1);
1636
1637   case S390_CC_OP_BFP_RESULT_128:
1638      return S390_CC_FOR_BFP128_RESULT(cc_dep1, cc_dep2);
1639
1640   case S390_CC_OP_BFP_32_TO_INT_32:
1641      return S390_CC_FOR_BFP_CONVERT("cfebr", cc_dep1, cc_dep2);
1642
1643   case S390_CC_OP_BFP_64_TO_INT_32:
1644      return S390_CC_FOR_BFP_CONVERT("cfdbr", cc_dep1, cc_dep2);
1645
1646   case S390_CC_OP_BFP_128_TO_INT_32:
1647      return S390_CC_FOR_BFP128_CONVERT("cfxbr", cc_dep1, cc_dep2, cc_ndep);
1648
1649   case S390_CC_OP_BFP_32_TO_INT_64:
1650      return S390_CC_FOR_BFP_CONVERT("cgebr", cc_dep1, cc_dep2);
1651
1652   case S390_CC_OP_BFP_64_TO_INT_64:
1653      return S390_CC_FOR_BFP_CONVERT("cgdbr", cc_dep1, cc_dep2);
1654
1655   case S390_CC_OP_BFP_128_TO_INT_64:
1656      return S390_CC_FOR_BFP128_CONVERT("cgxbr", cc_dep1, cc_dep2, cc_ndep);
1657
1658   case S390_CC_OP_BFP_TDC_32:
1659      return S390_CC_FOR_BFP_TDC("tceb", cc_dep1, cc_dep2);
1660
1661   case S390_CC_OP_BFP_TDC_64:
1662      return S390_CC_FOR_BFP_TDC("tcdb", cc_dep1, cc_dep2);
1663
1664   case S390_CC_OP_BFP_TDC_128:
1665      return S390_CC_FOR_BFP128_TDC(cc_dep1, cc_dep2, cc_ndep);
1666
1667   case S390_CC_OP_SET:
1668      return cc_dep1;
1669
1670   case S390_CC_OP_BFP_32_TO_UINT_32:
1671      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb39c0000", cc_dep1, cc_dep2);
1672
1673   case S390_CC_OP_BFP_64_TO_UINT_32:
1674      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb39d0000", cc_dep1, cc_dep2);
1675
1676   case S390_CC_OP_BFP_128_TO_UINT_32:
1677      return S390_CC_FOR_BFP128_UCONVERT(".insn rrf,0xb39e0000", cc_dep1,
1678                                         cc_dep2, cc_ndep);
1679
1680   case S390_CC_OP_BFP_32_TO_UINT_64:
1681      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb3ac0000", cc_dep1, cc_dep2);
1682
1683   case S390_CC_OP_BFP_64_TO_UINT_64:
1684      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb3ad0000", cc_dep1, cc_dep2);
1685
1686   case S390_CC_OP_BFP_128_TO_UINT_64:
1687      return S390_CC_FOR_BFP128_UCONVERT(".insn rrf,0xb3ae0000", cc_dep1,
1688                                         cc_dep2, cc_ndep);
1689
1690   case S390_CC_OP_DFP_RESULT_64:
1691      return S390_CC_FOR_DFP_RESULT(cc_dep1);
1692
1693   case S390_CC_OP_DFP_RESULT_128:
1694      return S390_CC_FOR_DFP128_RESULT(cc_dep1, cc_dep2);
1695
1696   case S390_CC_OP_DFP_TDC_32:  /* TDCET */
1697      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000050", cc_dep1, cc_dep2);
1698
1699   case S390_CC_OP_DFP_TDC_64:  /* TDCDT */
1700      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000054", cc_dep1, cc_dep2);
1701
1702   case S390_CC_OP_DFP_TDC_128: /* TDCXT */
1703      return S390_CC_FOR_DFP128_TD(".insn rxe, 0xed0000000058", cc_dep1,
1704                                   cc_dep2, cc_ndep);
1705
1706   case S390_CC_OP_DFP_TDG_32:  /* TDGET */
1707      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000051", cc_dep1, cc_dep2);
1708
1709   case S390_CC_OP_DFP_TDG_64:  /* TDGDT */
1710      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000055", cc_dep1, cc_dep2);
1711
1712   case S390_CC_OP_DFP_TDG_128: /* TDGXT */
1713      return S390_CC_FOR_DFP128_TD(".insn rxe, 0xed0000000059", cc_dep1,
1714                                   cc_dep2, cc_ndep);
1715
1716   case S390_CC_OP_DFP_64_TO_INT_32: /* CFDTR */
1717      return S390_CC_FOR_DFP_CONVERT(".insn rrf,0xb9410000", cc_dep1, cc_dep2);
1718
1719   case S390_CC_OP_DFP_128_TO_INT_32: /* CFXTR */
1720      return S390_CC_FOR_DFP128_CONVERT(".insn rrf,0xb9490000", cc_dep1,
1721                                        cc_dep2, cc_ndep);
1722
1723   case S390_CC_OP_DFP_64_TO_INT_64: /* CGDTR */
1724      return S390_CC_FOR_DFP_CONVERT(".insn rrf,0xb3e10000", cc_dep1, cc_dep2);
1725
1726   case S390_CC_OP_DFP_128_TO_INT_64: /* CGXTR */
1727      return S390_CC_FOR_DFP128_CONVERT(".insn rrf,0xb3e90000", cc_dep1,
1728                                        cc_dep2, cc_ndep);
1729
1730   case S390_CC_OP_DFP_64_TO_UINT_32: /* CLFDTR */
1731      return S390_CC_FOR_DFP_UCONVERT(".insn rrf,0xb9430000", cc_dep1, cc_dep2);
1732
1733   case S390_CC_OP_DFP_128_TO_UINT_32: /* CLFXTR */
1734      return S390_CC_FOR_DFP128_UCONVERT(".insn rrf,0xb94b0000", cc_dep1,
1735                                         cc_dep2, cc_ndep);
1736
1737   case S390_CC_OP_DFP_64_TO_UINT_64: /* CLGDTR */
1738      return S390_CC_FOR_DFP_UCONVERT(".insn rrf,0xb9420000", cc_dep1, cc_dep2);
1739
1740   case S390_CC_OP_DFP_128_TO_UINT_64: /* CLGXTR */
1741      return S390_CC_FOR_DFP128_UCONVERT(".insn rrf,0xb94a0000", cc_dep1,
1742                                         cc_dep2, cc_ndep);
1743
1744   case S390_CC_OP_PFPO_32: {
1745      __asm__ volatile(
1746           "ler 4, %[cc_dep1]\n\t"      /* 32 bit FR move */
1747           "lr  0, %[cc_dep2]\n\t"      /* 32 bit GR move */
1748           ".short 0x010a\n\t"          /* PFPO */
1749           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1750                                        : [cc_dep1] "f"(cc_dep1),
1751                                          [cc_dep2] "d"(cc_dep2)
1752                                        : "r0", "r1", "f4");
1753      return psw >> 28;  /* cc */
1754   }
1755
1756   case S390_CC_OP_PFPO_64: {
1757      __asm__ volatile(
1758           "ldr 4, %[cc_dep1]\n\t"
1759           "lr  0, %[cc_dep2]\n\t"      /* 32 bit register move */
1760           ".short 0x010a\n\t"          /* PFPO */
1761           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1762                                        : [cc_dep1] "f"(cc_dep1),
1763                                          [cc_dep2] "d"(cc_dep2)
1764                                        : "r0", "r1", "f4");
1765      return psw >> 28;  /* cc */
1766   }
1767
1768   case S390_CC_OP_PFPO_128: {
1769      __asm__ volatile(
1770           "ldr 4,%[cc_dep1]\n\t"
1771           "ldr 6,%[cc_dep2]\n\t"
1772           "lr  0,%[cc_ndep]\n\t"       /* 32 bit register move */
1773           ".short 0x010a\n\t"          /* PFPO */
1774           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1775                                        : [cc_dep1] "f"(cc_dep1),
1776                                          [cc_dep2] "f"(cc_dep2),
1777                                          [cc_ndep] "d"(cc_ndep)
1778                                        : "r0", "r1", "f0", "f2", "f4", "f6");
1779      return psw >> 28;  /* cc */
1780   }
1781
1782   default:
1783      break;
1784   }
1785#endif
1786   vpanic("s390_calculate_cc");
1787}
1788
1789
1790/* Note that this does *not* return a Boolean value. The result needs to be
1791   explicitly tested against zero. */
1792UInt
1793s390_calculate_cond(ULong mask, ULong op, ULong dep1, ULong dep2, ULong ndep)
1794{
1795   UInt cc = s390_calculate_cc(op, dep1, dep2, ndep);
1796
1797   return ((mask << cc) & 0x8);
1798}
1799
1800/*------------------------------------------------------------*/
1801/*--- spechelper for performance                           ---*/
1802/*------------------------------------------------------------*/
1803
1804
1805/* Convenience macros */
1806#define unop(op,a1) IRExpr_Unop((op),(a1))
1807#define binop(op,a1,a2) IRExpr_Binop((op),(a1),(a2))
1808#define mkU64(v) IRExpr_Const(IRConst_U64(v))
1809#define mkU32(v) IRExpr_Const(IRConst_U32(v))
1810#define mkU8(v)  IRExpr_Const(IRConst_U8(v))
1811
1812
1813static inline Bool
1814isC64(const IRExpr *expr)
1815{
1816   return expr->tag == Iex_Const && expr->Iex.Const.con->tag == Ico_U64;
1817}
1818
1819
1820/* The returned expression is NULL if no specialization was found. In that
1821   case the helper function will be called. Otherwise, the expression has
1822   type Ity_I32 and a Boolean value. */
1823IRExpr *
1824guest_s390x_spechelper(const HChar *function_name, IRExpr **args,
1825                       IRStmt **precedingStmts, Int n_precedingStmts)
1826{
1827   UInt i, arity = 0;
1828
1829   for (i = 0; args[i]; i++)
1830      arity++;
1831
1832#  if 0
1833   vex_printf("spec request:\n");
1834   vex_printf("   %s  ", function_name);
1835   for (i = 0; i < arity; i++) {
1836      vex_printf("  ");
1837      ppIRExpr(args[i]);
1838   }
1839   vex_printf("\n");
1840#  endif
1841
1842   /* --------- Specialising "s390_calculate_cond" --------- */
1843
1844   if (vex_streq(function_name, "s390_calculate_cond")) {
1845      IRExpr *cond_expr, *cc_op_expr, *cc_dep1, *cc_dep2;
1846      ULong cond, cc_op;
1847
1848      vassert(arity == 5);
1849
1850      cond_expr  = args[0];
1851      cc_op_expr = args[1];
1852
1853      /* The necessary requirement for all optimizations here is that the
1854         condition and the cc_op are constant. So check that upfront. */
1855      if (! isC64(cond_expr))  return NULL;
1856      if (! isC64(cc_op_expr)) return NULL;
1857
1858      cond    = cond_expr->Iex.Const.con->Ico.U64;
1859      cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
1860
1861      vassert(cond <= 15);
1862
1863      /*
1864        +------+---+---+---+---+
1865        | cc   | 0 | 1 | 2 | 3 |
1866        | cond | 8 | 4 | 2 | 1 |
1867        +------+---+---+---+---+
1868      */
1869      cc_dep1 = args[2];
1870      cc_dep2 = args[3];
1871
1872      /* S390_CC_OP_SIGNED_COMPARE */
1873      if (cc_op == S390_CC_OP_SIGNED_COMPARE) {
1874         /*
1875            cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
1876            cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
1877            cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
1878
1879            Because cc == 3 cannot occur the rightmost bit of cond is
1880            a don't care.
1881         */
1882         if (cond == 8 || cond == 8 + 1) {
1883            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1884         }
1885         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1886            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1887         }
1888         if (cond == 4 || cond == 4 + 1) {
1889            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1890         }
1891         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1892            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1893         }
1894         /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
1895         if (cond == 2 || cond == 2 + 1) {
1896            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1897         }
1898         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1899            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1900         }
1901         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1902            return mkU32(1);
1903         }
1904         /* Remaining case */
1905         return mkU32(0);
1906      }
1907
1908      /* S390_CC_OP_UNSIGNED_COMPARE */
1909      if (cc_op == S390_CC_OP_UNSIGNED_COMPARE) {
1910         /*
1911            cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
1912            cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
1913            cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
1914
1915            Because cc == 3 cannot occur the rightmost bit of cond is
1916            a don't care.
1917         */
1918         if (cond == 8 || cond == 8 + 1) {
1919            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1920         }
1921         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1922            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1923         }
1924         if (cond == 4 || cond == 4 + 1) {
1925            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1926         }
1927         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1928            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1929         }
1930         /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
1931         if (cond == 2 || cond == 2 + 1) {
1932            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
1933         }
1934         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1935            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1936         }
1937         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1938            return mkU32(1);
1939         }
1940         /* Remaining case */
1941         return mkU32(0);
1942      }
1943
1944      /* S390_CC_OP_LOAD_AND_TEST */
1945      if (cc_op == S390_CC_OP_LOAD_AND_TEST) {
1946         /*
1947            cc == 0  --> cc_dep1 == 0   (cond == 8)
1948            cc == 1  --> cc_dep1 <  0   (cond == 4)
1949            cc == 2  --> cc_dep1 >  0   (cond == 2)
1950
1951            Because cc == 3 cannot occur the rightmost bit of cond is
1952            a don't care.
1953         */
1954         if (cond == 8 || cond == 8 + 1) {
1955            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1956         }
1957         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1958            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1959         }
1960         if (cond == 4 || cond == 4 + 1) {
1961             /* Special case cc_dep < 0. Only check the MSB to avoid bogus
1962               memcheck complaints due to gcc magic. Fixes 343802
1963             */
1964            return unop(Iop_64to32, binop(Iop_Shr64, cc_dep1, mkU8(63)));
1965         }
1966         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1967            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, mkU64(0)));
1968         }
1969         /* cc_dep1 > 0  ---->  0 < cc_dep1 */
1970         if (cond == 2 || cond == 2 + 1) {
1971            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, mkU64(0), cc_dep1));
1972         }
1973         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1974            /* Special case cc_dep >= 0. Only check the MSB to avoid bogus
1975               memcheck complaints due to gcc magic. Fixes 308427
1976             */
1977            return unop(Iop_64to32, binop(Iop_Xor64,
1978                                          binop(Iop_Shr64, cc_dep1, mkU8(63)),
1979                                          mkU64(1)));
1980         }
1981         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1982            return mkU32(1);
1983         }
1984         /* Remaining case */
1985         return mkU32(0);
1986      }
1987
1988      /* S390_CC_OP_BITWISE */
1989      if (cc_op == S390_CC_OP_BITWISE) {
1990         /*
1991            cc_dep1 is the result of the boolean operation.
1992
1993            cc == 0  --> cc_dep1 == 0   (cond == 8)
1994            cc == 1  --> cc_dep1 != 0   (cond == 4)
1995
1996            Because cc == 2 and cc == 3 cannot occur the two rightmost bits of
1997            cond are don't cares. Therefore:
1998
1999            cond == 00xx  -> always false
2000            cond == 01xx  -> not equal
2001            cond == 10xx  -> equal
2002            cond == 11xx  -> always true
2003         */
2004         if ((cond & (8 + 4)) == 8 + 4) {
2005            return mkU32(1);
2006         }
2007         if (cond & 8) {
2008            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
2009         }
2010         if (cond & 4) {
2011            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2012         }
2013         /* Remaining case */
2014         return mkU32(0);
2015      }
2016
2017      /* S390_CC_OP_INSERT_CHAR_MASK_32
2018         Since the mask comes from an immediate field in the opcode, we
2019         expect the mask to be a constant here. That simplifies matters. */
2020      if (cc_op == S390_CC_OP_INSERT_CHAR_MASK_32) {
2021         ULong mask;
2022         UInt imask = 0, shift = 0;
2023         IRExpr *word;
2024
2025         if (! isC64(cc_dep2)) goto missed;
2026
2027         mask = cc_dep2->Iex.Const.con->Ico.U64;
2028
2029         /* Extract the 32-bit value from the thunk */
2030
2031         word = unop(Iop_64to32, cc_dep1);
2032
2033         switch (mask) {
2034         case 0:  shift =  0; imask = 0x00000000; break;
2035         case 1:  shift = 24; imask = 0x000000FF; break;
2036         case 2:  shift = 16; imask = 0x0000FF00; break;
2037         case 3:  shift = 16; imask = 0x0000FFFF; break;
2038         case 4:  shift =  8; imask = 0x00FF0000; break;
2039         case 5:  shift =  8; imask = 0x00FF00FF; break;
2040         case 6:  shift =  8; imask = 0x00FFFF00; break;
2041         case 7:  shift =  8; imask = 0x00FFFFFF; break;
2042         case 8:  shift =  0; imask = 0xFF000000; break;
2043         case 9:  shift =  0; imask = 0xFF0000FF; break;
2044         case 10: shift =  0; imask = 0xFF00FF00; break;
2045         case 11: shift =  0; imask = 0xFF00FFFF; break;
2046         case 12: shift =  0; imask = 0xFFFF0000; break;
2047         case 13: shift =  0; imask = 0xFFFF00FF; break;
2048         case 14: shift =  0; imask = 0xFFFFFF00; break;
2049         case 15: shift =  0; imask = 0xFFFFFFFF; break;
2050         }
2051
2052         /* Select the bits that were inserted */
2053         word = binop(Iop_And32, word, mkU32(imask));
2054
2055         /* cc == 0  --> all inserted bits zero or mask == 0   (cond == 8)
2056            cc == 1  --> leftmost inserted bit is one          (cond == 4)
2057            cc == 2  --> leftmost inserted bit is zero and not (cond == 2)
2058                         all inserted bits are zero
2059
2060            Because cc == 0,1,2 the rightmost bit of the mask is a don't care */
2061         if (cond == 8 || cond == 8 + 1) {
2062            return unop(Iop_1Uto32, binop(Iop_CmpEQ32, word, mkU32(0)));
2063         }
2064         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
2065            return unop(Iop_1Uto32, binop(Iop_CmpNE32, word, mkU32(0)));
2066         }
2067
2068         /* Sign extend */
2069         if (shift != 0) {
2070            word = binop(Iop_Sar32, binop(Iop_Shl32, word, mkU8(shift)),
2071                         mkU8(shift));
2072         }
2073
2074         if (cond == 4 || cond == 4 + 1) {  /* word < 0 */
2075            return unop(Iop_1Uto32, binop(Iop_CmpLT32S, word, mkU32(0)));
2076         }
2077         if (cond == 2 || cond == 2 + 1) {  /* word > 0 */
2078            return unop(Iop_1Uto32, binop(Iop_CmpLT32S, mkU32(0), word));
2079         }
2080         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
2081            return unop(Iop_1Uto32, binop(Iop_CmpLE32S, word, mkU32(0)));
2082         }
2083         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
2084            return unop(Iop_1Uto32, binop(Iop_CmpLE32S, mkU32(0), word));
2085         }
2086         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
2087            return mkU32(1);
2088         }
2089         /* Remaining case */
2090         return mkU32(0);
2091      }
2092
2093      /* S390_CC_OP_TEST_UNDER_MASK_8
2094         Since the mask comes from an immediate field in the opcode, we
2095         expect the mask to be a constant here. That simplifies matters. */
2096      if (cc_op == S390_CC_OP_TEST_UNDER_MASK_8) {
2097         ULong mask16;
2098
2099         if (! isC64(cc_dep2)) goto missed;
2100
2101         mask16 = cc_dep2->Iex.Const.con->Ico.U64;
2102
2103         /* Get rid of the mask16 == 0 case first. Some of the simplifications
2104            below (e.g. for OVFL) only hold if mask16 == 0.  */
2105         if (mask16 == 0) {   /* cc == 0 */
2106            if (cond & 0x8) return mkU32(1);
2107            return mkU32(0);
2108         }
2109
2110         /* cc == 2 is a don't care */
2111         if (cond == 8 || cond == 8 + 2) {
2112            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2113                                          binop(Iop_And64, cc_dep1, cc_dep2),
2114                                          mkU64(0)));
2115         }
2116         if (cond == 7 || cond == 7 - 2) {
2117            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2118                                          binop(Iop_And64, cc_dep1, cc_dep2),
2119                                          mkU64(0)));
2120         }
2121         if (cond == 1 || cond == 1 + 2) {
2122            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2123                                          binop(Iop_And64, cc_dep1, cc_dep2),
2124                                          cc_dep2));
2125         }
2126         if (cond == 14 || cond == 14 - 2) {  /* ! OVFL */
2127            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2128                                          binop(Iop_And64, cc_dep1, cc_dep2),
2129                                          cc_dep2));
2130         }
2131         goto missed;
2132      }
2133
2134      /* S390_CC_OP_TEST_UNDER_MASK_16
2135         Since the mask comes from an immediate field in the opcode, we
2136         expect the mask to be a constant here. That simplifies matters. */
2137      if (cc_op == S390_CC_OP_TEST_UNDER_MASK_16) {
2138         ULong mask16;
2139         UInt msb;
2140
2141         if (! isC64(cc_dep2)) goto missed;
2142
2143         mask16 = cc_dep2->Iex.Const.con->Ico.U64;
2144
2145         /* Get rid of the mask16 == 0 case first. Some of the simplifications
2146            below (e.g. for OVFL) only hold if mask16 == 0.  */
2147         if (mask16 == 0) {   /* cc == 0 */
2148            if (cond & 0x8) return mkU32(1);
2149            return mkU32(0);
2150         }
2151
2152         if (cond == 8) {
2153            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2154                                          binop(Iop_And64, cc_dep1, cc_dep2),
2155                                          mkU64(0)));
2156         }
2157         if (cond == 7) {
2158            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2159                                          binop(Iop_And64, cc_dep1, cc_dep2),
2160                                          mkU64(0)));
2161         }
2162         if (cond == 1) {
2163            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2164                                          binop(Iop_And64, cc_dep1, cc_dep2),
2165                                          mkU64(mask16)));
2166         }
2167         if (cond == 14) {  /* ! OVFL */
2168            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2169                                          binop(Iop_And64, cc_dep1, cc_dep2),
2170                                          mkU64(mask16)));
2171         }
2172
2173         /* Find MSB in mask */
2174         msb = 0x8000;
2175         while (msb > mask16)
2176            msb >>= 1;
2177
2178         if (cond == 2) {  /* cc == 2 */
2179            IRExpr *c1, *c2;
2180
2181            /* (cc_dep & msb) != 0 && (cc_dep & mask16) != mask16 */
2182            c1 = binop(Iop_CmpNE64,
2183                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2184            c2 = binop(Iop_CmpNE64,
2185                       binop(Iop_And64, cc_dep1, cc_dep2),
2186                       mkU64(mask16));
2187            return binop(Iop_And32, unop(Iop_1Uto32, c1),
2188                         unop(Iop_1Uto32, c2));
2189         }
2190
2191         if (cond == 4) {  /* cc == 1 */
2192            IRExpr *c1, *c2;
2193
2194            /* (cc_dep & msb) == 0 && (cc_dep & mask16) != 0 */
2195            c1 = binop(Iop_CmpEQ64,
2196                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2197            c2 = binop(Iop_CmpNE64,
2198                       binop(Iop_And64, cc_dep1, cc_dep2),
2199                       mkU64(0));
2200            return binop(Iop_And32, unop(Iop_1Uto32, c1),
2201                         unop(Iop_1Uto32, c2));
2202         }
2203
2204         if (cond == 11) {  /* cc == 0,2,3 */
2205            IRExpr *c1, *c2;
2206
2207            c1 = binop(Iop_CmpNE64,
2208                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2209            c2 = binop(Iop_CmpEQ64,
2210                       binop(Iop_And64, cc_dep1, cc_dep2),
2211                       mkU64(0));
2212            return binop(Iop_Or32, unop(Iop_1Uto32, c1),
2213                         unop(Iop_1Uto32, c2));
2214         }
2215
2216         if (cond == 3) {  /* cc == 2 || cc == 3 */
2217            return unop(Iop_1Uto32,
2218                        binop(Iop_CmpNE64,
2219                              binop(Iop_And64, cc_dep1, mkU64(msb)),
2220                              mkU64(0)));
2221         }
2222         if (cond == 12) { /* cc == 0 || cc == 1 */
2223            return unop(Iop_1Uto32,
2224                        binop(Iop_CmpEQ64,
2225                              binop(Iop_And64, cc_dep1, mkU64(msb)),
2226                              mkU64(0)));
2227         }
2228         // vex_printf("TUM mask = 0x%llx\n", mask16);
2229         goto missed;
2230      }
2231
2232      /* S390_CC_OP_UNSIGNED_SUB_64/32 */
2233      if (cc_op == S390_CC_OP_UNSIGNED_SUB_64 ||
2234          cc_op == S390_CC_OP_UNSIGNED_SUB_32) {
2235         /*
2236            cc_dep1, cc_dep2 are the zero extended left and right operands
2237
2238            cc == 1  --> result != 0, borrow    (cond == 4)
2239            cc == 2  --> result == 0, no borrow (cond == 2)
2240            cc == 3  --> result != 0, no borrow (cond == 1)
2241
2242            cc = (cc_dep1 == cc_dep2) ? 2
2243                                      : (cc_dep1 > cc_dep2) ? 3 : 1;
2244
2245            Because cc == 0 cannot occur the leftmost bit of cond is
2246            a don't care.
2247         */
2248         if (cond == 1 || cond == 1 + 8) {  /* cc == 3   op2 < op1 */
2249            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
2250         }
2251         if (cond == 2 || cond == 2 + 8) {  /* cc == 2 */
2252            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
2253         }
2254         if (cond == 4 || cond == 4 + 8) {  /* cc == 1 */
2255            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
2256         }
2257         if (cond == 3 || cond == 3 + 8) {  /* cc == 2 || cc == 3 */
2258            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
2259         }
2260         if (cond == 6 || cond == 6 + 8) {  /* cc == 2 || cc == 1 */
2261            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
2262         }
2263
2264         if (cond == 5 || cond == 5 + 8) {  /* cc == 3 || cc == 1 */
2265            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
2266         }
2267         if (cond == 7 || cond == 7 + 8) {
2268            return mkU32(1);
2269         }
2270         /* Remaining case */
2271         return mkU32(0);
2272      }
2273
2274      /* S390_CC_OP_UNSIGNED_ADD_64 */
2275      if (cc_op == S390_CC_OP_UNSIGNED_ADD_64) {
2276         /*
2277            cc_dep1, cc_dep2 are the zero extended left and right operands
2278
2279            cc == 0  --> result == 0, no carry  (cond == 8)
2280            cc == 1  --> result != 0, no carry  (cond == 4)
2281            cc == 2  --> result == 0, carry     (cond == 2)
2282            cc == 3  --> result != 0, carry     (cond == 1)
2283         */
2284         if (cond == 8) { /* cc == 0 */
2285            /* Both inputs are 0 */
2286            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2287                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2288                                          mkU64(0)));
2289         }
2290         if (cond == 7) { /* cc == 1,2,3 */
2291            /* Not both inputs are 0 */
2292            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2293                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2294                                          mkU64(0)));
2295         }
2296         if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
2297            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2298                                          binop(Iop_Add64, cc_dep1, cc_dep2),
2299                                          mkU64(0)));
2300         }
2301         if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
2302            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2303                                          binop(Iop_Add64, cc_dep1, cc_dep2),
2304                                          mkU64(0)));
2305         }
2306         goto missed;
2307      }
2308
2309      /* S390_CC_OP_UNSIGNED_ADD_32 */
2310      if (cc_op == S390_CC_OP_UNSIGNED_ADD_32) {
2311         /*
2312            cc_dep1, cc_dep2 are the zero extended left and right operands
2313
2314            cc == 0  --> result == 0, no carry  (cond == 8)
2315            cc == 1  --> result != 0, no carry  (cond == 4)
2316            cc == 2  --> result == 0, carry     (cond == 2)
2317            cc == 3  --> result != 0, carry     (cond == 1)
2318         */
2319         if (cond == 8) { /* cc == 0 */
2320            /* Both inputs are 0 */
2321            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2322                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2323                                          mkU64(0)));
2324         }
2325         if (cond == 7) { /* cc == 1,2,3 */
2326            /* Not both inputs are 0 */
2327            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2328                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2329                                          mkU64(0)));
2330         }
2331         if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
2332            return unop(Iop_1Uto32, binop(Iop_CmpEQ32,
2333                                          binop(Iop_Add32,
2334                                                unop(Iop_64to32, cc_dep1),
2335                                                unop(Iop_64to32, cc_dep2)),
2336                                          mkU32(0)));
2337         }
2338         if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
2339            return unop(Iop_1Uto32, binop(Iop_CmpNE32,
2340                                          binop(Iop_Add32,
2341                                                unop(Iop_64to32, cc_dep1),
2342                                                unop(Iop_64to32, cc_dep2)),
2343                                          mkU32(0)));
2344         }
2345         goto missed;
2346      }
2347
2348      /* S390_CC_OP_SET */
2349      if (cc_op == S390_CC_OP_SET) {
2350         /* cc_dep1 is the condition code
2351
2352            Return 1, if ((cond << cc_dep1) & 0x8) != 0 */
2353
2354        return unop(Iop_1Uto32,
2355                    binop(Iop_CmpNE64,
2356                          binop(Iop_And64,
2357                                binop(Iop_Shl64, cond_expr,
2358                                      unop(Iop_64to8, cc_dep1)),
2359                                mkU64(8)),
2360                          mkU64(0)));
2361      }
2362
2363      goto missed;
2364   }
2365
2366   /* --------- Specialising "s390_calculate_cond" --------- */
2367
2368   if (vex_streq(function_name, "s390_calculate_cc")) {
2369      IRExpr *cc_op_expr, *cc_dep1;
2370      ULong cc_op;
2371
2372      vassert(arity == 4);
2373
2374      cc_op_expr = args[0];
2375
2376      /* The necessary requirement for all optimizations here is that
2377         cc_op is constant. So check that upfront. */
2378      if (! isC64(cc_op_expr)) return NULL;
2379
2380      cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
2381      cc_dep1 = args[1];
2382
2383      if (cc_op == S390_CC_OP_BITWISE) {
2384         return unop(Iop_1Uto32,
2385                     binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2386      }
2387
2388      if (cc_op == S390_CC_OP_SET) {
2389         return unop(Iop_64to32, cc_dep1);
2390      }
2391
2392      goto missed;
2393   }
2394
2395missed:
2396   return NULL;
2397}
2398
2399/*---------------------------------------------------------------*/
2400/*--- end                                guest_s390_helpers.c ---*/
2401/*---------------------------------------------------------------*/
2402