1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*---------------------------------------------------------------*/
4/*--- begin                              guest_s390_helpers.c ---*/
5/*---------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright IBM Corp. 2010-2013
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31/* Contributed by Florian Krohm */
32
33#include "libvex_basictypes.h"
34#include "libvex_emnote.h"
35#include "libvex_guest_s390x.h"
36#include "libvex_ir.h"
37#include "libvex.h"
38#include "libvex_s390x_common.h"
39
40#include "main_util.h"
41#include "main_globals.h"
42#include "guest_generic_bb_to_IR.h"
43#include "guest_s390_defs.h"
44#include "s390_defs.h"               /* S390_BFP_ROUND_xyzzy */
45
46void
47LibVEX_GuestS390X_initialise(VexGuestS390XState *state)
48{
49/*------------------------------------------------------------*/
50/*--- Initialise ar registers                              ---*/
51/*------------------------------------------------------------*/
52
53   state->guest_a0 = 0;
54   state->guest_a1 = 0;
55   state->guest_a2 = 0;
56   state->guest_a3 = 0;
57   state->guest_a4 = 0;
58   state->guest_a5 = 0;
59   state->guest_a6 = 0;
60   state->guest_a7 = 0;
61   state->guest_a8 = 0;
62   state->guest_a9 = 0;
63   state->guest_a10 = 0;
64   state->guest_a11 = 0;
65   state->guest_a12 = 0;
66   state->guest_a13 = 0;
67   state->guest_a14 = 0;
68   state->guest_a15 = 0;
69
70/*------------------------------------------------------------*/
71/*--- Initialise fpr registers                             ---*/
72/*------------------------------------------------------------*/
73
74   state->guest_f0 = 0;
75   state->guest_f1 = 0;
76   state->guest_f2 = 0;
77   state->guest_f3 = 0;
78   state->guest_f4 = 0;
79   state->guest_f5 = 0;
80   state->guest_f6 = 0;
81   state->guest_f7 = 0;
82   state->guest_f8 = 0;
83   state->guest_f9 = 0;
84   state->guest_f10 = 0;
85   state->guest_f11 = 0;
86   state->guest_f12 = 0;
87   state->guest_f13 = 0;
88   state->guest_f14 = 0;
89   state->guest_f15 = 0;
90
91/*------------------------------------------------------------*/
92/*--- Initialise gpr registers                             ---*/
93/*------------------------------------------------------------*/
94
95   state->guest_r0 = 0;
96   state->guest_r1 = 0;
97   state->guest_r2 = 0;
98   state->guest_r3 = 0;
99   state->guest_r4 = 0;
100   state->guest_r5 = 0;
101   state->guest_r6 = 0;
102   state->guest_r7 = 0;
103   state->guest_r8 = 0;
104   state->guest_r9 = 0;
105   state->guest_r10 = 0;
106   state->guest_r11 = 0;
107   state->guest_r12 = 0;
108   state->guest_r13 = 0;
109   state->guest_r14 = 0;
110   state->guest_r15 = 0;
111
112/*------------------------------------------------------------*/
113/*--- Initialise S390 miscellaneous registers              ---*/
114/*------------------------------------------------------------*/
115
116   state->guest_counter = 0;
117   state->guest_fpc = 0;
118   state->guest_IA = 0;
119
120/*------------------------------------------------------------*/
121/*--- Initialise S390 pseudo registers                     ---*/
122/*------------------------------------------------------------*/
123
124   state->guest_SYSNO = 0;
125
126/*------------------------------------------------------------*/
127/*--- Initialise generic pseudo registers                  ---*/
128/*------------------------------------------------------------*/
129
130   state->guest_NRADDR = 0;
131   state->guest_CMSTART = 0;
132   state->guest_CMLEN = 0;
133   state->guest_IP_AT_SYSCALL = 0;
134   state->guest_EMNOTE = EmNote_NONE;
135   state->host_EvC_COUNTER = 0;
136   state->host_EvC_FAILADDR = 0;
137
138/*------------------------------------------------------------*/
139/*--- Initialise thunk                                     ---*/
140/*------------------------------------------------------------*/
141
142   state->guest_CC_OP = 0;
143   state->guest_CC_DEP1 = 0;
144   state->guest_CC_DEP2 = 0;
145   state->guest_CC_NDEP = 0;
146
147   __builtin_memset(state->padding, 0x0, sizeof(state->padding));
148}
149
150
151/* Figure out if any part of the guest state contained in minoff
152   .. maxoff requires precise memory exceptions.  If in doubt return
153   True (but this generates significantly slower code).  */
154Bool
155guest_s390x_state_requires_precise_mem_exns (
156   Int minoff, Int maxoff, VexRegisterUpdates pxControl
157)
158{
159   Int lr_min = S390X_GUEST_OFFSET(guest_LR);
160   Int lr_max = lr_min + 8 - 1;
161   Int sp_min = S390X_GUEST_OFFSET(guest_SP);
162   Int sp_max = sp_min + 8 - 1;
163   Int fp_min = S390X_GUEST_OFFSET(guest_FP);
164   Int fp_max = fp_min + 8 - 1;
165   Int ia_min = S390X_GUEST_OFFSET(guest_IA);
166   Int ia_max = ia_min + 8 - 1;
167
168   if (maxoff < sp_min || minoff > sp_max) {
169      /* No overlap with SP */
170      if (pxControl == VexRegUpdSpAtMemAccess)
171         return False; // We only need to check stack pointer.
172   } else {
173      return True;
174   }
175
176   if (maxoff < lr_min || minoff > lr_max) {
177      /* No overlap with LR */
178   } else {
179      return True;
180   }
181
182   if (maxoff < fp_min || minoff > fp_max) {
183      /* No overlap with FP */
184   } else {
185      return True;
186   }
187
188   if (maxoff < ia_min || minoff > ia_max) {
189      /* No overlap with IA */
190   } else {
191      return True;
192   }
193
194   return False;
195}
196
197
198#define ALWAYSDEFD(field)                             \
199    { S390X_GUEST_OFFSET(field),            \
200      (sizeof ((VexGuestS390XState*)0)->field) }
201
202VexGuestLayout s390xGuest_layout = {
203
204   /* Total size of the guest state, in bytes. */
205   .total_sizeB = sizeof(VexGuestS390XState),
206
207   /* Describe the stack pointer. */
208   .offset_SP = S390X_GUEST_OFFSET(guest_SP),
209   .sizeof_SP = 8,
210
211   /* Describe the frame pointer. */
212   .offset_FP = S390X_GUEST_OFFSET(guest_FP),
213   .sizeof_FP = 8,
214
215   /* Describe the instruction pointer. */
216   .offset_IP = S390X_GUEST_OFFSET(guest_IA),
217   .sizeof_IP = 8,
218
219   /* Describe any sections to be regarded by Memcheck as
220      'always-defined'. */
221   .n_alwaysDefd = 9,
222
223   /* Flags thunk: OP and NDEP are always defined, whereas DEP1
224      and DEP2 have to be tracked.  See detailed comment in
225      gdefs.h on meaning of thunk fields. */
226   .alwaysDefd = {
227      /*  0 */ ALWAYSDEFD(guest_CC_OP),     /* generic */
228      /*  1 */ ALWAYSDEFD(guest_CC_NDEP),   /* generic */
229      /*  2 */ ALWAYSDEFD(guest_EMNOTE),    /* generic */
230      /*  3 */ ALWAYSDEFD(guest_CMSTART),   /* generic */
231      /*  4 */ ALWAYSDEFD(guest_CMLEN),     /* generic */
232      /*  5 */ ALWAYSDEFD(guest_IP_AT_SYSCALL), /* generic */
233      /*  6 */ ALWAYSDEFD(guest_IA),        /* control reg */
234      /*  7 */ ALWAYSDEFD(guest_fpc),       /* control reg */
235      /*  8 */ ALWAYSDEFD(guest_counter),   /* internal usage register */
236   }
237};
238
239/*------------------------------------------------------------*/
240/*--- Dirty helper for EXecute                             ---*/
241/*------------------------------------------------------------*/
242void
243s390x_dirtyhelper_EX(ULong torun)
244{
245   last_execute_target = torun;
246}
247
248
249/*------------------------------------------------------------*/
250/*--- Dirty helper for Clock instructions                  ---*/
251/*------------------------------------------------------------*/
252#if defined(VGA_s390x)
253ULong
254s390x_dirtyhelper_STCK(ULong *addr)
255{
256   UInt cc;
257
258   asm volatile("stck %0\n"
259                "ipm %1\n"
260                "srl %1,28\n"
261                : "+Q" (*addr), "=d" (cc) : : "cc");
262   return cc;
263}
264
265ULong
266s390x_dirtyhelper_STCKE(ULong *addr)
267{
268   UInt cc;
269
270   asm volatile("stcke %0\n"
271                "ipm %1\n"
272                "srl %1,28\n"
273                : "+Q" (*addr), "=d" (cc) : : "cc");
274   return cc;
275}
276
277ULong s390x_dirtyhelper_STCKF(ULong *addr)
278{
279   UInt cc;
280
281   asm volatile(".insn s,0xb27c0000,%0\n"
282                "ipm %1\n"
283                "srl %1,28\n"
284                : "+Q" (*addr), "=d" (cc) : : "cc");
285   return cc;
286}
287#else
288ULong s390x_dirtyhelper_STCK(ULong *addr)  {return 3;}
289ULong s390x_dirtyhelper_STCKF(ULong *addr) {return 3;}
290ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;}
291#endif /* VGA_s390x */
292
293/*------------------------------------------------------------*/
294/*--- Dirty helper for Store Facility instruction          ---*/
295/*------------------------------------------------------------*/
296#if defined(VGA_s390x)
297static void
298s390_set_facility_bit(ULong *addr, UInt bitno, UInt value)
299{
300   addr  += bitno / 64;
301   bitno  = bitno % 64;
302
303   ULong mask = 1;
304   mask <<= (63 - bitno);
305
306   if (value == 1) {
307      *addr |= mask;   // set
308   } else {
309      *addr &= ~mask;  // clear
310   }
311}
312
313ULong
314s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
315{
316   ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i;
317   register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF;  /* r0[56:63] */
318
319   /* We cannot store more than S390_NUM_FACILITY_DW
320      (and it makes not much sense to do so anyhow) */
321   if (reg0 > S390_NUM_FACILITY_DW - 1)
322      reg0 = S390_NUM_FACILITY_DW - 1;
323
324   num_dw = reg0 + 1;  /* number of double words written */
325
326   asm volatile(" .insn s,0xb2b00000,%0\n"   /* stfle */
327                "ipm    %2\n"
328                "srl    %2,28\n"
329                : "=m" (hoststfle), "+d"(reg0), "=d"(cc) : : "cc", "memory");
330
331   /* Update guest register 0  with what STFLE set r0 to */
332   guest_state->guest_r0 = reg0;
333
334   /* Set default: VM facilities = host facilities */
335   for (i = 0; i < num_dw; ++i)
336      addr[i] = hoststfle[i];
337
338   /* Now adjust the VM facilities according to what the VM supports */
339   s390_set_facility_bit(addr, S390_FAC_LDISP,  1);
340   s390_set_facility_bit(addr, S390_FAC_EIMM,   1);
341   s390_set_facility_bit(addr, S390_FAC_ETF2,   1);
342   s390_set_facility_bit(addr, S390_FAC_ETF3,   1);
343   s390_set_facility_bit(addr, S390_FAC_GIE,    1);
344   s390_set_facility_bit(addr, S390_FAC_EXEXT,  1);
345   s390_set_facility_bit(addr, S390_FAC_HIGHW,  1);
346
347   s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0);
348   s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0);
349   s390_set_facility_bit(addr, S390_FAC_XCPUT,  0);
350   s390_set_facility_bit(addr, S390_FAC_MSA,    0);
351   s390_set_facility_bit(addr, S390_FAC_PENH,   0);
352   s390_set_facility_bit(addr, S390_FAC_DFP,    0);
353   s390_set_facility_bit(addr, S390_FAC_PFPO,   0);
354   s390_set_facility_bit(addr, S390_FAC_DFPZC,  0);
355   s390_set_facility_bit(addr, S390_FAC_MISC,   0);
356   s390_set_facility_bit(addr, S390_FAC_CTREXE, 0);
357   s390_set_facility_bit(addr, S390_FAC_TREXE,  0);
358   s390_set_facility_bit(addr, S390_FAC_MSA4,   0);
359
360   return cc;
361}
362
363#else
364
365ULong
366s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
367{
368   return 3;
369}
370#endif /* VGA_s390x */
371
372/*------------------------------------------------------------*/
373/*--- Dirty helper for the "convert unicode" insn family.  ---*/
374/*------------------------------------------------------------*/
375void
376s390x_dirtyhelper_CUxy(UChar *address, ULong data, ULong num_bytes)
377{
378   UInt i;
379
380   vassert(num_bytes >= 1 && num_bytes <= 4);
381
382   /* Store the least significant NUM_BYTES bytes in DATA left to right
383      at ADDRESS. */
384   for (i = 1; i <= num_bytes; ++i) {
385      address[num_bytes - i] = data & 0xff;
386      data >>= 8;
387   }
388}
389
390
391/*------------------------------------------------------------*/
392/*--- Clean helper for CU21.                               ---*/
393/*------------------------------------------------------------*/
394
395/* The function performs a CU21 operation. It returns three things
396   encoded in an ULong value:
397   - the converted bytes (at most 4)
398   - the number of converted bytes
399   - an indication whether LOW_SURROGATE, if any, is invalid
400
401   64      48                16           8                       0
402    +-------+-----------------+-----------+-----------------------+
403    |  0x0  | converted bytes | num_bytes | invalid_low_surrogate |
404    +-------+-----------------+-----------+-----------------------+
405*/
406ULong
407s390_do_cu21(UInt srcval, UInt low_surrogate)
408{
409   ULong retval = 0;   // shut up gcc
410   UInt b1, b2, b3, b4, num_bytes, invalid_low_surrogate = 0;
411
412   srcval &= 0xffff;
413
414   /* Determine the number of bytes in the converted value */
415   if (srcval <= 0x007f)
416      num_bytes = 1;
417   else if (srcval >= 0x0080 && srcval <= 0x07ff)
418      num_bytes = 2;
419   else if ((srcval >= 0x0800 && srcval <= 0xd7ff) ||
420            (srcval >= 0xdc00 && srcval <= 0xffff))
421      num_bytes = 3;
422   else
423      num_bytes = 4;
424
425   /* Determine UTF-8 bytes according to calculated num_bytes */
426   switch (num_bytes){
427   case 1:
428      retval = srcval;
429      break;
430
431   case 2:
432      /* order of bytes left to right: b1, b2 */
433      b1  = 0xc0;
434      b1 |= srcval >> 6;
435
436      b2  = 0x80;
437      b2 |= srcval & 0x3f;
438
439      retval = (b1 << 8) | b2;
440      break;
441
442   case 3:
443      /* order of bytes left to right: b1, b2, b3 */
444      b1  = 0xe0;
445      b1 |= srcval >> 12;
446
447      b2  = 0x80;
448      b2 |= (srcval >> 6) & 0x3f;
449
450      b3  = 0x80;
451      b3 |= srcval & 0x3f;
452
453      retval = (b1 << 16) | (b2 << 8) | b3;
454      break;
455
456   case 4: {
457      /* order of bytes left to right: b1, b2, b3, b4 */
458      UInt high_surrogate = srcval;
459      UInt uvwxy = ((high_surrogate >> 6) & 0xf) + 1;   // abcd + 1
460
461      b1  = 0xf0;
462      b1 |= uvwxy >> 2;     // uvw
463
464      b2  = 0x80;
465      b2 |= (uvwxy & 0x3) << 4;           // xy
466      b2 |= (high_surrogate >> 2) & 0xf;  // efgh
467
468      b3  = 0x80;
469      b3 |= (high_surrogate & 0x3) << 4;   // ij
470      b3 |= (low_surrogate >> 6) & 0xf;    // klmn
471
472      b4  = 0x80;
473      b4 |= low_surrogate & 0x3f;
474
475      retval = (b1 << 24) | (b2 << 16) | (b3 << 8) | b4;
476
477      invalid_low_surrogate = (low_surrogate & 0xfc00) != 0xdc00;
478      break;
479   }
480   }
481
482   /* At this point RETVAL contains the converted bytes.
483      Build up the final return value. */
484   return (retval << 16) | (num_bytes << 8) | invalid_low_surrogate;
485}
486
487
488/*------------------------------------------------------------*/
489/*--- Clean helper for CU24.                               ---*/
490/*------------------------------------------------------------*/
491
492/* The function performs a CU24 operation. It returns two things
493   encoded in an ULong value:
494   - the 4 converted bytes
495   - an indication whether LOW_SURROGATE, if any, is invalid
496
497   64     40                 8                       0
498    +------------------------+-----------------------+
499    |  0x0 | converted bytes | invalid_low_surrogate |
500    +------------------------+-----------------------+
501*/
502ULong
503s390_do_cu24(UInt srcval, UInt low_surrogate)
504{
505   ULong retval;
506   UInt invalid_low_surrogate = 0;
507
508   srcval &= 0xffff;
509
510   if ((srcval >= 0x0000 && srcval <= 0xd7ff) ||
511       (srcval >= 0xdc00 && srcval <= 0xffff)) {
512      retval = srcval;
513   } else {
514      /* D800 - DBFF */
515      UInt high_surrogate = srcval;
516      UInt uvwxy  = ((high_surrogate >> 6) & 0xf) + 1;   // abcd + 1
517      UInt efghij = high_surrogate & 0x3f;
518      UInt klmnoprst = low_surrogate & 0x3ff;
519
520      retval = (uvwxy << 16) | (efghij << 10) | klmnoprst;
521
522      invalid_low_surrogate = (low_surrogate & 0xfc00) != 0xdc00;
523   }
524
525   /* At this point RETVAL contains the converted bytes.
526      Build up the final return value. */
527   return (retval << 8) | invalid_low_surrogate;
528}
529
530
531/*------------------------------------------------------------*/
532/*--- Clean helper for CU42.                               ---*/
533/*------------------------------------------------------------*/
534
535/* The function performs a CU42 operation. It returns three things
536   encoded in an ULong value:
537   - the converted bytes (at most 4)
538   - the number of converted bytes (2 or 4; 0 if invalid character)
539   - an indication whether the UTF-32 character is invalid
540
541   64      48                16           8                   0
542    +-------+-----------------+-----------+-------------------+
543    |  0x0  | converted bytes | num_bytes | invalid_character |
544    +-------+-----------------+-----------+-------------------+
545*/
546ULong
547s390_do_cu42(UInt srcval)
548{
549   ULong retval;
550   UInt num_bytes, invalid_character = 0;
551
552   if ((srcval >= 0x0000 && srcval <= 0xd7ff) ||
553       (srcval >= 0xdc00 && srcval <= 0xffff)) {
554      retval = srcval;
555      num_bytes = 2;
556   } else if (srcval >= 0x00010000 && srcval <= 0x0010FFFF) {
557      UInt uvwxy  = srcval >> 16;
558      UInt abcd   = (uvwxy - 1) & 0xf;
559      UInt efghij = (srcval >> 10) & 0x3f;
560
561      UInt high_surrogate = (0xd8 << 8) | (abcd << 6) | efghij;
562      UInt low_surrogate  = (0xdc << 8) | (srcval & 0x3ff);
563
564      retval = (high_surrogate << 16) | low_surrogate;
565      num_bytes = 4;
566   } else {
567      /* D800 - DBFF or 00110000 - FFFFFFFF */
568      invalid_character = 1;
569      retval = num_bytes = 0;   /* does not matter; not used */
570   }
571
572   /* At this point RETVAL contains the converted bytes.
573      Build up the final return value. */
574   return (retval << 16) | (num_bytes << 8) | invalid_character;
575}
576
577
578/*------------------------------------------------------------*/
579/*--- Clean helper for CU41.                               ---*/
580/*------------------------------------------------------------*/
581
582/* The function performs a CU41 operation. It returns three things
583   encoded in an ULong value:
584   - the converted bytes (at most 4)
585   - the number of converted bytes (1, 2, 3, or 4; 0 if invalid character)
586   - an indication whether the UTF-32 character is invalid
587
588   64      48                16           8                   0
589    +-------+-----------------+-----------+-------------------+
590    |  0x0  | converted bytes | num_bytes | invalid_character |
591    +-------+-----------------+-----------+-------------------+
592*/
593ULong
594s390_do_cu41(UInt srcval)
595{
596   ULong retval;
597   UInt num_bytes, invalid_character = 0;
598
599   if (srcval <= 0x7f) {
600      retval = srcval;
601      num_bytes = 1;
602   } else if (srcval >= 0x80 && srcval <= 0x7ff) {
603      UInt fghij  = srcval >> 6;
604      UInt klmnop = srcval & 0x3f;
605      UInt byte1  = (0xc0 | fghij);
606      UInt byte2  = (0x80 | klmnop);
607
608      retval = (byte1 << 8) | byte2;
609      num_bytes = 2;
610   } else if ((srcval >= 0x800  && srcval <= 0xd7ff) ||
611              (srcval >= 0xdc00 && srcval <= 0xffff)) {
612      UInt abcd   = srcval >> 12;
613      UInt efghij = (srcval >> 6) & 0x3f;
614      UInt klmnop = srcval & 0x3f;
615      UInt byte1  = 0xe0 | abcd;
616      UInt byte2  = 0x80 | efghij;
617      UInt byte3  = 0x80 | klmnop;
618
619      retval = (byte1 << 16) | (byte2 << 8) | byte3;
620      num_bytes = 3;
621   } else if (srcval >= 0x10000 && srcval <= 0x10ffff) {
622      UInt uvw    = (srcval >> 18) & 0x7;
623      UInt xy     = (srcval >> 16) & 0x3;
624      UInt efgh   = (srcval >> 12) & 0xf;
625      UInt ijklmn = (srcval >>  6) & 0x3f;
626      UInt opqrst = srcval & 0x3f;
627      UInt byte1  = 0xf0 | uvw;
628      UInt byte2  = 0x80 | (xy << 4) | efgh;
629      UInt byte3  = 0x80 | ijklmn;
630      UInt byte4  = 0x80 | opqrst;
631
632      retval = (byte1 << 24) | (byte2 << 16) | (byte3 << 8) | byte4;
633      num_bytes = 4;
634   } else {
635      /* d800 ... dbff or 00110000 ... ffffffff */
636      invalid_character = 1;
637
638      retval = 0;
639      num_bytes = 0;
640   }
641
642   /* At this point RETVAL contains the converted bytes.
643      Build up the final return value. */
644   return (retval << 16) | (num_bytes << 8) | invalid_character;
645}
646
647
648/*------------------------------------------------------------*/
649/*--- Clean helpers for CU12.                              ---*/
650/*------------------------------------------------------------*/
651
652/* The function looks at the first byte of an UTF-8 character and returns
653   two things encoded in an ULong value:
654
655   - the number of bytes that need to be read
656   - an indication whether the UTF-8 character is invalid
657
658   64      16           8                   0
659    +-------------------+-------------------+
660    |  0x0  | num_bytes | invalid_character |
661    +-------+-----------+-------------------+
662*/
663ULong
664s390_do_cu12_cu14_helper1(UInt byte, UInt etf3_and_m3_is_1)
665{
666   vassert(byte <= 0xff);
667
668   /* Check whether the character is invalid */
669   if (byte >= 0x80 && byte <= 0xbf) return 1;
670   if (byte >= 0xf8) return 1;
671
672   if (etf3_and_m3_is_1) {
673      if (byte == 0xc0 || byte == 0xc1) return 1;
674      if (byte >= 0xf5 && byte <= 0xf7) return 1;
675   }
676
677   /* Character is valid */
678   if (byte <= 0x7f) return 1 << 8;   // 1 byte
679   if (byte <= 0xdf) return 2 << 8;   // 2 bytes
680   if (byte <= 0xef) return 3 << 8;   // 3 bytes
681
682   return 4 << 8;  // 4 bytes
683}
684
685/* The function performs a CU12 or CU14 operation. BYTE1, BYTE2, etc are the
686   bytes as read from the input stream, left to right. BYTE1 is a valid
687   byte. The function returns three things encoded in an ULong value:
688
689   - the converted bytes
690   - the number of converted bytes (2 or 4; 0 if invalid character)
691   - an indication whether the UTF-16 character is invalid
692
693   64      48                16           8                   0
694    +-------+-----------------+-----------+-------------------+
695    |  0x0  | converted bytes | num_bytes | invalid_character |
696    +-------+-----------------+-----------+-------------------+
697*/
698static ULong
699s390_do_cu12_cu14_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
700                          ULong stuff, Bool is_cu12)
701{
702   UInt num_src_bytes = stuff >> 1, etf3_and_m3_is_1 = stuff & 0x1;
703   UInt num_bytes = 0, invalid_character = 0;
704   ULong retval = 0;
705
706   vassert(num_src_bytes <= 4);
707
708   switch (num_src_bytes) {
709   case 1:
710      num_bytes = 2;
711      retval = byte1;
712      break;
713
714   case 2: {
715      /* Test validity */
716      if (etf3_and_m3_is_1) {
717         if (byte2 < 0x80 || byte2 > 0xbf) {
718            invalid_character = 1;
719            break;
720         }
721      }
722
723      /* OK */
724      UInt fghij  = byte1 & 0x1f;
725      UInt klmnop = byte2 & 0x3f;
726
727      num_bytes = 2;
728      retval = (fghij << 6) | klmnop;
729      break;
730   }
731
732   case 3: {
733      /* Test validity */
734      if (etf3_and_m3_is_1) {
735         if (byte1 == 0xe0) {
736            if ((byte2 < 0xa0 || byte2 > 0xbf) ||
737                (byte3 < 0x80 || byte3 > 0xbf)) {
738               invalid_character = 1;
739               break;
740            }
741         }
742         if ((byte1 >= 0xe1 && byte1 <= 0xec) ||
743             byte1 == 0xee || byte1 == 0xef) {
744            if ((byte2 < 0x80 || byte2 > 0xbf) ||
745                (byte3 < 0x80 || byte3 > 0xbf)) {
746               invalid_character = 1;
747               break;
748            }
749         }
750         if (byte1 == 0xed) {
751            if ((byte2 < 0x80 || byte2 > 0x9f) ||
752                (byte3 < 0x80 || byte3 > 0xbf)) {
753               invalid_character = 1;
754               break;
755            }
756         }
757      }
758
759      /* OK */
760      UInt abcd   = byte1 & 0xf;
761      UInt efghij = byte2 & 0x3f;
762      UInt klmnop = byte3 & 0x3f;
763
764      num_bytes = 2;
765      retval = (abcd << 12) | (efghij << 6) | klmnop;
766      break;
767   }
768
769   case 4: {
770      /* Test validity */
771      if (etf3_and_m3_is_1) {
772         if (byte1 == 0xf0) {
773            if ((byte2 < 0x90 || byte2 > 0xbf) ||
774                (byte3 < 0x80 || byte3 > 0xbf) ||
775                (byte4 < 0x80 || byte4 > 0xbf)) {
776               invalid_character = 1;
777               break;
778            }
779         }
780         if (byte1 == 0xf1 || byte1 == 0xf2 || byte1 == 0xf3) {
781            if ((byte2 < 0x80 || byte2 > 0xbf) ||
782                (byte3 < 0x80 || byte3 > 0xbf) ||
783                (byte4 < 0x80 || byte4 > 0xbf)) {
784               invalid_character = 1;
785               break;
786            }
787         }
788         if (byte1 == 0xf4) {
789            if ((byte2 < 0x80 || byte2 > 0x8f) ||
790                (byte3 < 0x80 || byte3 > 0xbf) ||
791                (byte4 < 0x80 || byte4 > 0xbf)) {
792               invalid_character = 1;
793               break;
794            }
795         }
796      }
797
798      /* OK */
799      UInt uvw    = byte1 & 0x7;
800      UInt xy     = (byte2 >> 4) & 0x3;
801      UInt uvwxy  = (uvw << 2) | xy;
802      UInt efgh   = byte2 & 0xf;
803      UInt ij     = (byte3 >> 4) & 0x3;
804      UInt klmn   = byte3 & 0xf;
805      UInt opqrst = byte4 & 0x3f;
806
807      if (is_cu12) {
808         UInt abcd = (uvwxy - 1) & 0xf;
809         UInt high_surrogate = (0xd8 << 8) | (abcd << 6) | (efgh << 2) | ij;
810         UInt low_surrogate  = (0xdc << 8) | (klmn << 6) | opqrst;
811
812         num_bytes = 4;
813         retval = (high_surrogate << 16) | low_surrogate;
814      } else {
815         num_bytes = 4;
816         retval =
817            (uvwxy << 16) | (efgh << 12) | (ij << 10) | (klmn << 6) | opqrst;
818      }
819      break;
820   }
821   }
822
823   if (! is_cu12) num_bytes = 4;   // for CU14, by definition
824
825   /* At this point RETVAL contains the converted bytes.
826      Build up the final return value. */
827   return (retval << 16) | (num_bytes << 8) | invalid_character;
828}
829
830ULong
831s390_do_cu12_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
832                     ULong stuff)
833{
834   return s390_do_cu12_cu14_helper2(byte1, byte2, byte3, byte4, stuff,
835                                    /* is_cu12 = */ 1);
836}
837
838ULong
839s390_do_cu14_helper2(UInt byte1, UInt byte2, UInt byte3, UInt byte4,
840                     ULong stuff)
841{
842   return s390_do_cu12_cu14_helper2(byte1, byte2, byte3, byte4, stuff,
843                                    /* is_cu12 = */ 0);
844}
845
846
847/*------------------------------------------------------------*/
848/*--- Clean helper for "convert to binary".                ---*/
849/*------------------------------------------------------------*/
850#if defined(VGA_s390x)
851UInt
852s390_do_cvb(ULong decimal)
853{
854   UInt binary;
855
856   __asm__ volatile (
857        "cvb %[result],%[input]\n\t"
858          : [result] "=d"(binary)
859          : [input] "m"(decimal)
860   );
861
862   return binary;
863}
864
865#else
866UInt s390_do_cvb(ULong decimal) { return 0; }
867#endif
868
869
870/*------------------------------------------------------------*/
871/*--- Clean helper for "convert to decimal".                ---*/
872/*------------------------------------------------------------*/
873#if defined(VGA_s390x)
874ULong
875s390_do_cvd(ULong binary_in)
876{
877   UInt binary = binary_in & 0xffffffffULL;
878   ULong decimal;
879
880   __asm__ volatile (
881        "cvd %[input],%[result]\n\t"
882          : [result] "=m"(decimal)
883          : [input] "d"(binary)
884   );
885
886   return decimal;
887}
888
889#else
890ULong s390_do_cvd(ULong binary) { return 0; }
891#endif
892
893/*------------------------------------------------------------*/
894/*--- Clean helper for "Extract cache attribute".          ---*/
895/*------------------------------------------------------------*/
896#if defined(VGA_s390x)
897ULong
898s390_do_ecag(ULong op2addr)
899{
900   ULong result;
901
902   __asm__ volatile(".insn rsy,0xEB000000004C,%[out],0,0(%[in])\n\t"
903                    : [out] "=d"(result)
904                    : [in] "d"(op2addr));
905   return result;
906}
907
908#else
909ULong s390_do_ecag(ULong op2addr) { return 0; }
910#endif
911
912/*------------------------------------------------------------*/
913/*--- Clean helper for "Perform Floating Point Operation". ---*/
914/*------------------------------------------------------------*/
915#if defined(VGA_s390x)
916UInt
917s390_do_pfpo(UInt gpr0)
918{
919   UChar rm;
920   UChar op1_ty, op2_ty;
921
922   rm  = gpr0 & 0xf;
923   if (rm > 1 && rm < 8)
924      return EmFail_S390X_invalid_PFPO_rounding_mode;
925
926   op1_ty = (gpr0 >> 16) & 0xff; // gpr0[40:47]
927   op2_ty = (gpr0 >> 8)  & 0xff; // gpr0[48:55]
928   /* Operand type must be BFP 32, 64, 128 or DFP 32, 64, 128
929      which correspond to 0x5, 0x6, 0x7, 0x8, 0x9, 0xa respectively.
930      Any other operand type value is unsupported */
931   if ((op1_ty == op2_ty) ||
932       (op1_ty < 0x5 || op1_ty > 0xa) ||
933       (op2_ty < 0x5 || op2_ty > 0xa))
934      return EmFail_S390X_invalid_PFPO_function;
935
936   return EmNote_NONE;
937}
938#else
939UInt s390_do_pfpo(UInt gpr0) { return 0; }
940#endif
941
942/*------------------------------------------------------------*/
943/*--- Helper for condition code.                           ---*/
944/*------------------------------------------------------------*/
945
946/* Convert an IRRoundingMode value to s390_bfp_round_t */
947#if defined(VGA_s390x)
948static s390_bfp_round_t
949decode_bfp_rounding_mode(UInt irrm)
950{
951   switch (irrm) {
952   case Irrm_NEAREST: return S390_BFP_ROUND_NEAREST_EVEN;
953   case Irrm_NegINF:  return S390_BFP_ROUND_NEGINF;
954   case Irrm_PosINF:  return S390_BFP_ROUND_POSINF;
955   case Irrm_ZERO:    return S390_BFP_ROUND_ZERO;
956   }
957   vpanic("decode_bfp_rounding_mode");
958}
959#endif
960
961
962#define S390_CC_FOR_BINARY(opcode,cc_dep1,cc_dep2) \
963({ \
964   __asm__ volatile ( \
965        opcode " %[op1],%[op2]\n\t" \
966        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+d"(cc_dep1) \
967                                   : [op2] "d"(cc_dep2) \
968                                   : "cc");\
969   psw >> 28;   /* cc */ \
970})
971
972#define S390_CC_FOR_TERNARY_SUBB(opcode,cc_dep1,cc_dep2,cc_ndep) \
973({ \
974   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
975      for rationale. */ \
976   cc_dep2 = cc_dep2 ^ cc_ndep; \
977   __asm__ volatile ( \
978	"lghi 0,1\n\t" \
979	"sr 0,%[op3]\n\t" /* borrow to cc */ \
980        opcode " %[op1],%[op2]\n\t" /* then redo the op */\
981        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
982                                   : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
983                                   : "0", "cc");\
984   psw >> 28;   /* cc */ \
985})
986
987#define S390_CC_FOR_TERNARY_ADDC(opcode,cc_dep1,cc_dep2,cc_ndep) \
988({ \
989   /* Recover the original DEP2 value. See comment near s390_cc_thunk_put3 \
990      for rationale. */ \
991   cc_dep2 = cc_dep2 ^ cc_ndep; \
992   __asm__ volatile ( \
993	"lgfr 0,%[op3]\n\t" /* first load cc_ndep */ \
994	"aghi 0,0\n\t" /* and convert it into a cc */ \
995        opcode " %[op1],%[op2]\n\t" /* then redo the op */\
996        "ipm %[psw]\n\t"           : [psw] "=d"(psw), [op1] "+&d"(cc_dep1) \
997                                   : [op2] "d"(cc_dep2), [op3] "d"(cc_ndep) \
998                                   : "0", "cc");\
999   psw >> 28;   /* cc */ \
1000})
1001
1002
1003#define S390_CC_FOR_BFP_RESULT(opcode,cc_dep1) \
1004({ \
1005   __asm__ volatile ( \
1006        opcode " 0,%[op]\n\t" \
1007        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1008                                   : [op]  "f"(cc_dep1) \
1009                                   : "cc", "f0");\
1010   psw >> 28;   /* cc */ \
1011})
1012
1013#define S390_CC_FOR_BFP128_RESULT(hi,lo) \
1014({ \
1015   __asm__ volatile ( \
1016        "ldr   4,%[high]\n\t" \
1017        "ldr   6,%[low]\n\t" \
1018        "ltxbr 0,4\n\t" \
1019        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1020                                   : [high] "f"(hi), [low] "f"(lo) \
1021                                   : "cc", "f0", "f2", "f4", "f6");\
1022   psw >> 28;   /* cc */ \
1023})
1024
1025#define S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode) \
1026({ \
1027   __asm__ volatile ( \
1028        opcode " 0," #rounding_mode ",%[op]\n\t" \
1029        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1030                                   : [op]  "f"(cc_dep1) \
1031                                   : "cc", "r0");\
1032   psw >> 28;   /* cc */ \
1033})
1034
1035#define S390_CC_FOR_BFP_CONVERT(opcode,cc_dep1,cc_dep2)   \
1036({                                                        \
1037   UInt cc;                                               \
1038   switch (decode_bfp_rounding_mode(cc_dep2)) {           \
1039   case S390_BFP_ROUND_NEAREST_EVEN:                      \
1040      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,4); \
1041      break;                                              \
1042   case S390_BFP_ROUND_ZERO:                              \
1043      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,5); \
1044      break;                                              \
1045   case S390_BFP_ROUND_POSINF:                            \
1046      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,6); \
1047      break;                                              \
1048   case S390_BFP_ROUND_NEGINF:                            \
1049      cc = S390_CC_FOR_BFP_CONVERT_AUX(opcode,cc_dep1,7); \
1050      break;                                              \
1051   default:                                               \
1052      vpanic("unexpected bfp rounding mode");             \
1053   }                                                      \
1054   cc;                                                    \
1055})
1056
1057#define S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode) \
1058({ \
1059   __asm__ volatile ( \
1060        opcode ",0,%[op]," #rounding_mode ",0\n\t" \
1061        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1062                                   : [op]  "f"(cc_dep1) \
1063                                   : "cc", "r0");\
1064   psw >> 28;   /* cc */ \
1065})
1066
1067#define S390_CC_FOR_BFP_UCONVERT(opcode,cc_dep1,cc_dep2)   \
1068({                                                         \
1069   UInt cc;                                                \
1070   switch (decode_bfp_rounding_mode(cc_dep2)) {            \
1071   case S390_BFP_ROUND_NEAREST_EVEN:                       \
1072      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,4); \
1073      break;                                               \
1074   case S390_BFP_ROUND_ZERO:                               \
1075      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,5); \
1076      break;                                               \
1077   case S390_BFP_ROUND_POSINF:                             \
1078      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,6); \
1079      break;                                               \
1080   case S390_BFP_ROUND_NEGINF:                             \
1081      cc = S390_CC_FOR_BFP_UCONVERT_AUX(opcode,cc_dep1,7); \
1082      break;                                               \
1083   default:                                                \
1084      vpanic("unexpected bfp rounding mode");              \
1085   }                                                       \
1086   cc;                                                     \
1087})
1088
1089#define S390_CC_FOR_BFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode) \
1090({ \
1091   __asm__ volatile ( \
1092        "ldr   4,%[high]\n\t" \
1093        "ldr   6,%[low]\n\t" \
1094        opcode " 0," #rounding_mode ",4\n\t" \
1095        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1096                                   : [high] "f"(hi), [low] "f"(lo) \
1097                                   : "cc", "r0", "f4", "f6");\
1098   psw >> 28;   /* cc */ \
1099})
1100
1101#define S390_CC_FOR_BFP128_CONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)   \
1102({                                                                   \
1103   UInt cc;                                                          \
1104   /* Recover the original DEP2 value. See comment near              \
1105      s390_cc_thunk_put3 for rationale. */                           \
1106   cc_dep2 = cc_dep2 ^ cc_ndep;                                      \
1107   switch (decode_bfp_rounding_mode(cc_ndep)) {                      \
1108   case S390_BFP_ROUND_NEAREST_EVEN:                                 \
1109      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,4); \
1110      break;                                                         \
1111   case S390_BFP_ROUND_ZERO:                                         \
1112      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,5); \
1113      break;                                                         \
1114   case S390_BFP_ROUND_POSINF:                                       \
1115      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,6); \
1116      break;                                                         \
1117   case S390_BFP_ROUND_NEGINF:                                       \
1118      cc = S390_CC_FOR_BFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,7); \
1119      break;                                                         \
1120   default:                                                          \
1121      vpanic("unexpected bfp rounding mode");                        \
1122   }                                                                 \
1123   cc;                                                               \
1124})
1125
1126#define S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode) \
1127({ \
1128   __asm__ volatile ( \
1129        "ldr   4,%[high]\n\t" \
1130        "ldr   6,%[low]\n\t" \
1131        opcode ",0,4," #rounding_mode ",0\n\t" \
1132        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1133                                   : [high] "f"(hi), [low] "f"(lo) \
1134                                   : "cc", "r0", "f4", "f6");\
1135   psw >> 28;   /* cc */ \
1136})
1137
1138#define S390_CC_FOR_BFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)   \
1139({                                                                    \
1140   UInt cc;                                                           \
1141   /* Recover the original DEP2 value. See comment near               \
1142      s390_cc_thunk_put3 for rationale. */                            \
1143   cc_dep2 = cc_dep2 ^ cc_ndep;                                       \
1144   switch (decode_bfp_rounding_mode(cc_ndep)) {                       \
1145   case S390_BFP_ROUND_NEAREST_EVEN:                                  \
1146      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,4); \
1147      break;                                                          \
1148   case S390_BFP_ROUND_ZERO:                                          \
1149      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,5); \
1150      break;                                                          \
1151   case S390_BFP_ROUND_POSINF:                                        \
1152      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,6); \
1153      break;                                                          \
1154   case S390_BFP_ROUND_NEGINF:                                        \
1155      cc = S390_CC_FOR_BFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,7); \
1156      break;                                                          \
1157   default:                                                           \
1158      vpanic("unexpected bfp rounding mode");                         \
1159   }                                                                  \
1160   cc;                                                                \
1161})
1162
1163#define S390_CC_FOR_BFP_TDC(opcode,cc_dep1,cc_dep2) \
1164({ \
1165   __asm__ volatile ( \
1166        opcode " %[value],0(%[class])\n\t" \
1167        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1168                                   : [value] "f"(cc_dep1), \
1169                                     [class] "a"(cc_dep2)  \
1170                                   : "cc");\
1171   psw >> 28;   /* cc */ \
1172})
1173
1174#define S390_CC_FOR_BFP128_TDC(cc_dep1,cc_dep2,cc_ndep) \
1175({ \
1176   /* Recover the original DEP2 value. See comment near \
1177      s390_cc_thunk_put1f128Z for rationale. */ \
1178   cc_dep2 = cc_dep2 ^ cc_ndep; \
1179   __asm__ volatile ( \
1180        "ldr  4,%[high]\n\t" \
1181        "ldr  6,%[low]\n\t" \
1182        "tcxb 4,0(%[class])\n\t" \
1183        "ipm  %[psw]\n\t"          : [psw] "=d"(psw) \
1184                                   : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
1185                                     [class] "a"(cc_ndep)  \
1186                                   : "cc", "f4", "f6");\
1187   psw >> 28;   /* cc */ \
1188})
1189
1190/* Convert an IRRoundingMode value to s390_dfp_round_t */
1191#if defined(VGA_s390x)
1192static s390_dfp_round_t
1193decode_dfp_rounding_mode(UInt irrm)
1194{
1195   switch (irrm) {
1196   case Irrm_NEAREST:
1197      return S390_DFP_ROUND_NEAREST_EVEN_4;
1198   case Irrm_NegINF:
1199      return S390_DFP_ROUND_NEGINF_7;
1200   case Irrm_PosINF:
1201      return S390_DFP_ROUND_POSINF_6;
1202   case Irrm_ZERO:
1203      return S390_DFP_ROUND_ZERO_5;
1204   case Irrm_NEAREST_TIE_AWAY_0:
1205      return S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1;
1206   case Irrm_PREPARE_SHORTER:
1207      return S390_DFP_ROUND_PREPARE_SHORT_3;
1208   case Irrm_AWAY_FROM_ZERO:
1209      return S390_DFP_ROUND_AWAY_0;
1210   case Irrm_NEAREST_TIE_TOWARD_0:
1211      return S390_DFP_ROUND_NEAREST_TIE_TOWARD_0;
1212   }
1213   vpanic("decode_dfp_rounding_mode");
1214}
1215#endif
1216
1217#define S390_CC_FOR_DFP_RESULT(cc_dep1) \
1218({ \
1219   __asm__ volatile ( \
1220        ".insn rre, 0xb3d60000,0,%[op]\n\t"              /* LTDTR */ \
1221        "ipm %[psw]\n\t"           : [psw] "=d"(psw) \
1222                                   : [op]  "f"(cc_dep1) \
1223                                   : "cc", "f0"); \
1224   psw >> 28;   /* cc */ \
1225})
1226
1227#define S390_CC_FOR_DFP128_RESULT(hi,lo) \
1228({ \
1229   __asm__ volatile ( \
1230        "ldr   4,%[high]\n\t"                                           \
1231        "ldr   6,%[low]\n\t"                                            \
1232        ".insn rre, 0xb3de0000,0,4\n\t"    /* LTXTR */                  \
1233        "ipm %[psw]\n\t"           : [psw] "=d"(psw)                    \
1234                                   : [high] "f"(hi), [low] "f"(lo)      \
1235                                   : "cc", "f0", "f2", "f4", "f6");     \
1236   psw >> 28;   /* cc */                                                \
1237})
1238
1239#define S390_CC_FOR_DFP_TD(opcode,cc_dep1,cc_dep2)                      \
1240({                                                                      \
1241   __asm__ volatile (                                                   \
1242        opcode ",%[value],0(%[class])\n\t"                              \
1243        "ipm %[psw]\n\t"           : [psw] "=d"(psw)                    \
1244                                   : [value] "f"(cc_dep1),              \
1245                                     [class] "a"(cc_dep2)               \
1246                                   : "cc");                             \
1247   psw >> 28;   /* cc */                                                \
1248})
1249
1250#define S390_CC_FOR_DFP128_TD(opcode,cc_dep1,cc_dep2,cc_ndep)           \
1251({                                                                      \
1252   /* Recover the original DEP2 value. See comment near                 \
1253      s390_cc_thunk_put1d128Z for rationale. */                         \
1254   cc_dep2 = cc_dep2 ^ cc_ndep;                                         \
1255   __asm__ volatile (                                                   \
1256        "ldr  4,%[high]\n\t"                                            \
1257        "ldr  6,%[low]\n\t"                                             \
1258        opcode ",4,0(%[class])\n\t"                                     \
1259        "ipm  %[psw]\n\t"          : [psw] "=d"(psw)                    \
1260                                   : [high] "f"(cc_dep1), [low] "f"(cc_dep2), \
1261                                     [class] "a"(cc_ndep)               \
1262                                   : "cc", "f4", "f6");                 \
1263   psw >> 28;   /* cc */                                                \
1264})
1265
1266#define S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,rounding_mode)       \
1267   ({                                                                   \
1268      __asm__ volatile (                                                \
1269                        opcode ",0,%[op]," #rounding_mode ",0\n\t"      \
1270                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1271                        : [op] "f"(cc_dep1)                             \
1272                        : "cc", "r0");                                  \
1273      psw >> 28;   /* cc */                                             \
1274   })
1275
1276#define S390_CC_FOR_DFP_CONVERT(opcode,cc_dep1,cc_dep2)                 \
1277   ({                                                                   \
1278      UInt cc;                                                          \
1279      switch (decode_dfp_rounding_mode(cc_dep2)) {                      \
1280      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                         \
1281      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                        \
1282         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,1);            \
1283         break;                                                         \
1284      case S390_DFP_ROUND_PREPARE_SHORT_3:                              \
1285      case S390_DFP_ROUND_PREPARE_SHORT_15:                             \
1286         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,3);            \
1287         break;                                                         \
1288      case S390_DFP_ROUND_NEAREST_EVEN_4:                               \
1289      case S390_DFP_ROUND_NEAREST_EVEN_8:                               \
1290         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,4);            \
1291         break;                                                         \
1292      case S390_DFP_ROUND_ZERO_5:                                       \
1293      case S390_DFP_ROUND_ZERO_9:                                       \
1294         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,5);            \
1295         break;                                                         \
1296      case S390_DFP_ROUND_POSINF_6:                                     \
1297      case S390_DFP_ROUND_POSINF_10:                                    \
1298         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,6);            \
1299         break;                                                         \
1300      case S390_DFP_ROUND_NEGINF_7:                                     \
1301      case S390_DFP_ROUND_NEGINF_11:                                    \
1302         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,7);            \
1303         break;                                                         \
1304      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                         \
1305         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,13);           \
1306         break;                                                         \
1307      case S390_DFP_ROUND_AWAY_0:                                       \
1308         cc = S390_CC_FOR_DFP_CONVERT_AUX(opcode,cc_dep1,14);           \
1309         break;                                                         \
1310      default:                                                          \
1311         vpanic("unexpected dfp rounding mode");                        \
1312      }                                                                 \
1313      cc;                                                               \
1314   })
1315
1316#define S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,rounding_mode)      \
1317   ({                                                                   \
1318      __asm__ volatile (                                                \
1319                        opcode ",0,%[op]," #rounding_mode ",0\n\t"      \
1320                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1321                        : [op] "f"(cc_dep1)                             \
1322                        : "cc", "r0");                                  \
1323      psw >> 28;   /* cc */                                             \
1324   })
1325
1326#define S390_CC_FOR_DFP_UCONVERT(opcode,cc_dep1,cc_dep2)                \
1327   ({                                                                   \
1328      UInt cc;                                                          \
1329      switch (decode_dfp_rounding_mode(cc_dep2)) {                      \
1330      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                         \
1331      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                        \
1332         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,1);           \
1333         break;                                                         \
1334      case S390_DFP_ROUND_PREPARE_SHORT_3:                              \
1335      case S390_DFP_ROUND_PREPARE_SHORT_15:                             \
1336         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,3);           \
1337         break;                                                         \
1338      case S390_DFP_ROUND_NEAREST_EVEN_4:                               \
1339      case S390_DFP_ROUND_NEAREST_EVEN_8:                               \
1340         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,4);           \
1341         break;                                                         \
1342      case S390_DFP_ROUND_ZERO_5:                                       \
1343      case S390_DFP_ROUND_ZERO_9:                                       \
1344         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,5);           \
1345         break;                                                         \
1346      case S390_DFP_ROUND_POSINF_6:                                     \
1347      case S390_DFP_ROUND_POSINF_10:                                    \
1348         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,6);           \
1349         break;                                                         \
1350      case S390_DFP_ROUND_NEGINF_7:                                     \
1351      case S390_DFP_ROUND_NEGINF_11:                                    \
1352         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,7);           \
1353         break;                                                         \
1354      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                         \
1355         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,13);          \
1356         break;                                                         \
1357      case S390_DFP_ROUND_AWAY_0:                                       \
1358         cc = S390_CC_FOR_DFP_UCONVERT_AUX(opcode,cc_dep1,14);          \
1359         break;                                                         \
1360      default:                                                          \
1361         vpanic("unexpected dfp rounding mode");                        \
1362      }                                                                 \
1363      cc;                                                               \
1364   })
1365
1366#define S390_CC_FOR_DFP128_CONVERT_AUX(opcode,hi,lo,rounding_mode)      \
1367   ({                                                                   \
1368      __asm__ volatile (                                                \
1369                        "ldr   4,%[high]\n\t"                           \
1370                        "ldr   6,%[low]\n\t"                            \
1371                        opcode ",0,4," #rounding_mode ",0\n\t"          \
1372                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)    \
1373                        : [high] "f"(hi), [low] "f"(lo)                 \
1374                        : "cc", "r0", "f4", "f6");                      \
1375      psw >> 28;   /* cc */                                             \
1376   })
1377
1378#define S390_CC_FOR_DFP128_CONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)       \
1379   ({                                                                    \
1380      UInt cc;                                                           \
1381      /* Recover the original DEP2 value. See comment near               \
1382         s390_cc_thunk_put3 for rationale. */                            \
1383      cc_dep2 = cc_dep2 ^ cc_ndep;                                       \
1384      switch (decode_dfp_rounding_mode(cc_ndep)) {                       \
1385      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                          \
1386      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                         \
1387         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,1);  \
1388         break;                                                          \
1389      case S390_DFP_ROUND_PREPARE_SHORT_3:                               \
1390      case S390_DFP_ROUND_PREPARE_SHORT_15:                              \
1391         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,3);  \
1392         break;                                                          \
1393      case S390_DFP_ROUND_NEAREST_EVEN_4:                                \
1394      case S390_DFP_ROUND_NEAREST_EVEN_8:                                \
1395         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,4);  \
1396         break;                                                          \
1397      case S390_DFP_ROUND_ZERO_5:                                        \
1398      case S390_DFP_ROUND_ZERO_9:                                        \
1399         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,5);  \
1400         break;                                                          \
1401      case S390_DFP_ROUND_POSINF_6:                                      \
1402      case S390_DFP_ROUND_POSINF_10:                                     \
1403         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,6);  \
1404         break;                                                          \
1405      case S390_DFP_ROUND_NEGINF_7:                                      \
1406      case S390_DFP_ROUND_NEGINF_11:                                     \
1407         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,7);  \
1408         break;                                                          \
1409      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                          \
1410         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,13); \
1411         break;                                                          \
1412      case S390_DFP_ROUND_AWAY_0:                                        \
1413         cc = S390_CC_FOR_DFP128_CONVERT_AUX(opcode,cc_dep1,cc_dep2,14); \
1414         break;                                                          \
1415      default:                                                           \
1416         vpanic("unexpected dfp rounding mode");                         \
1417      }                                                                  \
1418      cc;                                                                \
1419   })
1420
1421#define S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,hi,lo,rounding_mode)      \
1422   ({                                                                    \
1423      __asm__ volatile (                                                 \
1424                        "ldr   4,%[high]\n\t"                            \
1425                        "ldr   6,%[low]\n\t"                             \
1426                        opcode ",0,4," #rounding_mode ",0\n\t"           \
1427                        "ipm %[psw]\n\t"           : [psw] "=d"(psw)     \
1428                        : [high] "f"(hi), [low] "f"(lo)                  \
1429                        : "cc", "r0", "f4", "f6");                       \
1430      psw >> 28;   /* cc */                                              \
1431   })
1432
1433#define S390_CC_FOR_DFP128_UCONVERT(opcode,cc_dep1,cc_dep2,cc_ndep)       \
1434   ({                                                                     \
1435      UInt cc;                                                            \
1436      /* Recover the original DEP2 value. See comment near                \
1437         s390_cc_thunk_put3 for rationale. */                             \
1438      cc_dep2 = cc_dep2 ^ cc_ndep;                                        \
1439      switch (decode_dfp_rounding_mode(cc_ndep)) {                        \
1440      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_1:                           \
1441      case S390_DFP_ROUND_NEAREST_TIE_AWAY_0_12:                          \
1442         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,1);  \
1443         break;                                                           \
1444      case S390_DFP_ROUND_PREPARE_SHORT_3:                                \
1445      case S390_DFP_ROUND_PREPARE_SHORT_15:                               \
1446         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,3);  \
1447         break;                                                           \
1448      case S390_DFP_ROUND_NEAREST_EVEN_4:                                 \
1449      case S390_DFP_ROUND_NEAREST_EVEN_8:                                 \
1450         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,4);  \
1451         break;                                                           \
1452      case S390_DFP_ROUND_ZERO_5:                                         \
1453      case S390_DFP_ROUND_ZERO_9:                                         \
1454         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,5);  \
1455         break;                                                           \
1456      case S390_DFP_ROUND_POSINF_6:                                       \
1457      case S390_DFP_ROUND_POSINF_10:                                      \
1458         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,6);  \
1459         break;                                                           \
1460      case S390_DFP_ROUND_NEGINF_7:                                       \
1461      case S390_DFP_ROUND_NEGINF_11:                                      \
1462         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,7);  \
1463         break;                                                           \
1464      case S390_DFP_ROUND_NEAREST_TIE_TOWARD_0:                           \
1465         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,13); \
1466         break;                                                           \
1467      case S390_DFP_ROUND_AWAY_0:                                         \
1468         cc = S390_CC_FOR_DFP128_UCONVERT_AUX(opcode,cc_dep1,cc_dep2,14); \
1469         break;                                                           \
1470      default:                                                            \
1471         vpanic("unexpected dfp rounding mode");                          \
1472      }                                                                   \
1473      cc;                                                                 \
1474   })
1475
1476
1477/* Return the value of the condition code from the supplied thunk parameters.
1478   This is not the value of the PSW. It is the value of the 2 CC bits within
1479   the PSW. The returned value is thusly in the interval [0:3]. */
1480UInt
1481s390_calculate_cc(ULong cc_op, ULong cc_dep1, ULong cc_dep2, ULong cc_ndep)
1482{
1483#if defined(VGA_s390x)
1484   UInt psw;
1485
1486   switch (cc_op) {
1487
1488   case S390_CC_OP_BITWISE:
1489      return S390_CC_FOR_BINARY("ogr", cc_dep1, (ULong)0);
1490
1491   case S390_CC_OP_SIGNED_COMPARE:
1492      return S390_CC_FOR_BINARY("cgr", cc_dep1, cc_dep2);
1493
1494   case S390_CC_OP_UNSIGNED_COMPARE:
1495      return S390_CC_FOR_BINARY("clgr", cc_dep1, cc_dep2);
1496
1497   case S390_CC_OP_SIGNED_ADD_64:
1498      return S390_CC_FOR_BINARY("agr", cc_dep1, cc_dep2);
1499
1500   case S390_CC_OP_SIGNED_ADD_32:
1501      return S390_CC_FOR_BINARY("ar", cc_dep1, cc_dep2);
1502
1503   case S390_CC_OP_SIGNED_SUB_64:
1504      return S390_CC_FOR_BINARY("sgr", cc_dep1, cc_dep2);
1505
1506   case S390_CC_OP_SIGNED_SUB_32:
1507      return S390_CC_FOR_BINARY("sr", cc_dep1, cc_dep2);
1508
1509   case S390_CC_OP_UNSIGNED_ADD_64:
1510      return S390_CC_FOR_BINARY("algr", cc_dep1, cc_dep2);
1511
1512   case S390_CC_OP_UNSIGNED_ADD_32:
1513      return S390_CC_FOR_BINARY("alr", cc_dep1, cc_dep2);
1514
1515   case S390_CC_OP_UNSIGNED_ADDC_64:
1516      return S390_CC_FOR_TERNARY_ADDC("alcgr", cc_dep1, cc_dep2, cc_ndep);
1517
1518   case S390_CC_OP_UNSIGNED_ADDC_32:
1519      return S390_CC_FOR_TERNARY_ADDC("alcr", cc_dep1, cc_dep2, cc_ndep);
1520
1521   case S390_CC_OP_UNSIGNED_SUB_64:
1522      return S390_CC_FOR_BINARY("slgr", cc_dep1, cc_dep2);
1523
1524   case S390_CC_OP_UNSIGNED_SUB_32:
1525      return S390_CC_FOR_BINARY("slr", cc_dep1, cc_dep2);
1526
1527   case S390_CC_OP_UNSIGNED_SUBB_64:
1528      return S390_CC_FOR_TERNARY_SUBB("slbgr", cc_dep1, cc_dep2, cc_ndep);
1529
1530   case S390_CC_OP_UNSIGNED_SUBB_32:
1531      return S390_CC_FOR_TERNARY_SUBB("slbr", cc_dep1, cc_dep2, cc_ndep);
1532
1533   case S390_CC_OP_LOAD_AND_TEST:
1534      /* Like signed comparison with 0 */
1535      return S390_CC_FOR_BINARY("cgr", cc_dep1, (Long)0);
1536
1537   case S390_CC_OP_LOAD_POSITIVE_32:
1538      __asm__ volatile (
1539           "lpr  %[result],%[op]\n\t"
1540           "ipm  %[psw]\n\t"         : [psw] "=d"(psw), [result] "=d"(cc_dep1)
1541                                     : [op] "d"(cc_dep1)
1542                                     : "cc");
1543      return psw >> 28;   /* cc */
1544
1545   case S390_CC_OP_LOAD_POSITIVE_64:
1546      __asm__ volatile (
1547           "lpgr %[result],%[op]\n\t"
1548           "ipm  %[psw]\n\t"         : [psw] "=d"(psw), [result] "=d"(cc_dep1)
1549                                     : [op] "d"(cc_dep1)
1550                                     : "cc");
1551      return psw >> 28;   /* cc */
1552
1553   case S390_CC_OP_TEST_UNDER_MASK_8: {
1554      UChar value  = cc_dep1;
1555      UChar mask   = cc_dep2;
1556
1557      __asm__ volatile (
1558           "bras %%r2,1f\n\t"             /* %r2 = address of next insn */
1559           "tm %[value],0\n\t"            /* this is skipped, then EXecuted */
1560           "1: ex %[mask],0(%%r2)\n\t"    /* EXecute TM after modifying mask */
1561           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1562                                        : [value] "m"(value), [mask] "a"(mask)
1563                                        : "r2", "cc");
1564      return psw >> 28;   /* cc */
1565   }
1566
1567   case S390_CC_OP_TEST_UNDER_MASK_16: {
1568      /* Create a TMLL insn with the mask as given by cc_dep2 */
1569      UInt insn  = (0xA701u << 16) | cc_dep2;
1570      UInt value = cc_dep1;
1571
1572      __asm__ volatile (
1573           "lr   1,%[value]\n\t"
1574           "lhi  2,0x10\n\t"
1575           "ex   2,%[insn]\n\t"
1576           "ipm  %[psw]\n\t"       : [psw] "=d"(psw)
1577                                   : [value] "d"(value), [insn] "m"(insn)
1578                                   : "r1", "r2", "cc");
1579      return psw >> 28;   /* cc */
1580   }
1581
1582   case S390_CC_OP_SHIFT_LEFT_32:
1583      __asm__ volatile (
1584           "sla  %[op],0(%[amount])\n\t"
1585           "ipm  %[psw]\n\t"            : [psw] "=d"(psw), [op] "+d"(cc_dep1)
1586                                        : [amount] "a"(cc_dep2)
1587                                        : "cc");
1588      return psw >> 28;   /* cc */
1589
1590   case S390_CC_OP_SHIFT_LEFT_64: {
1591      Int high = (Int)(cc_dep1 >> 32);
1592      Int low  = (Int)(cc_dep1 & 0xFFFFFFFF);
1593
1594      __asm__ volatile (
1595           "lr   2,%[high]\n\t"
1596           "lr   3,%[low]\n\t"
1597           "slda 2,0(%[amount])\n\t"
1598           "ipm %[psw]\n\t"             : [psw] "=d"(psw), [high] "+d"(high),
1599                                          [low] "+d"(low)
1600                                        : [amount] "a"(cc_dep2)
1601                                        : "cc", "r2", "r3");
1602      return psw >> 28;   /* cc */
1603   }
1604
1605   case S390_CC_OP_INSERT_CHAR_MASK_32: {
1606      Int inserted = 0;
1607      Int msb = 0;
1608
1609      if (cc_dep2 & 1) {
1610         inserted |= cc_dep1 & 0xff;
1611         msb = 0x80;
1612      }
1613      if (cc_dep2 & 2) {
1614         inserted |= cc_dep1 & 0xff00;
1615         msb = 0x8000;
1616      }
1617      if (cc_dep2 & 4) {
1618         inserted |= cc_dep1 & 0xff0000;
1619         msb = 0x800000;
1620      }
1621      if (cc_dep2 & 8) {
1622         inserted |= cc_dep1 & 0xff000000;
1623         msb = 0x80000000;
1624      }
1625
1626      if (inserted & msb)  // MSB is 1
1627         return 1;
1628      if (inserted > 0)
1629         return 2;
1630      return 0;
1631   }
1632
1633   case S390_CC_OP_BFP_RESULT_32:
1634      return S390_CC_FOR_BFP_RESULT("ltebr", cc_dep1);
1635
1636   case S390_CC_OP_BFP_RESULT_64:
1637      return S390_CC_FOR_BFP_RESULT("ltdbr", cc_dep1);
1638
1639   case S390_CC_OP_BFP_RESULT_128:
1640      return S390_CC_FOR_BFP128_RESULT(cc_dep1, cc_dep2);
1641
1642   case S390_CC_OP_BFP_32_TO_INT_32:
1643      return S390_CC_FOR_BFP_CONVERT("cfebr", cc_dep1, cc_dep2);
1644
1645   case S390_CC_OP_BFP_64_TO_INT_32:
1646      return S390_CC_FOR_BFP_CONVERT("cfdbr", cc_dep1, cc_dep2);
1647
1648   case S390_CC_OP_BFP_128_TO_INT_32:
1649      return S390_CC_FOR_BFP128_CONVERT("cfxbr", cc_dep1, cc_dep2, cc_ndep);
1650
1651   case S390_CC_OP_BFP_32_TO_INT_64:
1652      return S390_CC_FOR_BFP_CONVERT("cgebr", cc_dep1, cc_dep2);
1653
1654   case S390_CC_OP_BFP_64_TO_INT_64:
1655      return S390_CC_FOR_BFP_CONVERT("cgdbr", cc_dep1, cc_dep2);
1656
1657   case S390_CC_OP_BFP_128_TO_INT_64:
1658      return S390_CC_FOR_BFP128_CONVERT("cgxbr", cc_dep1, cc_dep2, cc_ndep);
1659
1660   case S390_CC_OP_BFP_TDC_32:
1661      return S390_CC_FOR_BFP_TDC("tceb", cc_dep1, cc_dep2);
1662
1663   case S390_CC_OP_BFP_TDC_64:
1664      return S390_CC_FOR_BFP_TDC("tcdb", cc_dep1, cc_dep2);
1665
1666   case S390_CC_OP_BFP_TDC_128:
1667      return S390_CC_FOR_BFP128_TDC(cc_dep1, cc_dep2, cc_ndep);
1668
1669   case S390_CC_OP_SET:
1670      return cc_dep1;
1671
1672   case S390_CC_OP_BFP_32_TO_UINT_32:
1673      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb39c0000", cc_dep1, cc_dep2);
1674
1675   case S390_CC_OP_BFP_64_TO_UINT_32:
1676      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb39d0000", cc_dep1, cc_dep2);
1677
1678   case S390_CC_OP_BFP_128_TO_UINT_32:
1679      return S390_CC_FOR_BFP128_UCONVERT(".insn rrf,0xb39e0000", cc_dep1,
1680                                         cc_dep2, cc_ndep);
1681
1682   case S390_CC_OP_BFP_32_TO_UINT_64:
1683      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb3ac0000", cc_dep1, cc_dep2);
1684
1685   case S390_CC_OP_BFP_64_TO_UINT_64:
1686      return S390_CC_FOR_BFP_UCONVERT(".insn rrf,0xb3ad0000", cc_dep1, cc_dep2);
1687
1688   case S390_CC_OP_BFP_128_TO_UINT_64:
1689      return S390_CC_FOR_BFP128_UCONVERT(".insn rrf,0xb3ae0000", cc_dep1,
1690                                         cc_dep2, cc_ndep);
1691
1692   case S390_CC_OP_DFP_RESULT_64:
1693      return S390_CC_FOR_DFP_RESULT(cc_dep1);
1694
1695   case S390_CC_OP_DFP_RESULT_128:
1696      return S390_CC_FOR_DFP128_RESULT(cc_dep1, cc_dep2);
1697
1698   case S390_CC_OP_DFP_TDC_32:  /* TDCET */
1699      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000050", cc_dep1, cc_dep2);
1700
1701   case S390_CC_OP_DFP_TDC_64:  /* TDCDT */
1702      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000054", cc_dep1, cc_dep2);
1703
1704   case S390_CC_OP_DFP_TDC_128: /* TDCXT */
1705      return S390_CC_FOR_DFP128_TD(".insn rxe, 0xed0000000058", cc_dep1,
1706                                   cc_dep2, cc_ndep);
1707
1708   case S390_CC_OP_DFP_TDG_32:  /* TDGET */
1709      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000051", cc_dep1, cc_dep2);
1710
1711   case S390_CC_OP_DFP_TDG_64:  /* TDGDT */
1712      return S390_CC_FOR_DFP_TD(".insn rxe, 0xed0000000055", cc_dep1, cc_dep2);
1713
1714   case S390_CC_OP_DFP_TDG_128: /* TDGXT */
1715      return S390_CC_FOR_DFP128_TD(".insn rxe, 0xed0000000059", cc_dep1,
1716                                   cc_dep2, cc_ndep);
1717
1718   case S390_CC_OP_DFP_64_TO_INT_32: /* CFDTR */
1719      return S390_CC_FOR_DFP_CONVERT(".insn rrf,0xb9410000", cc_dep1, cc_dep2);
1720
1721   case S390_CC_OP_DFP_128_TO_INT_32: /* CFXTR */
1722      return S390_CC_FOR_DFP128_CONVERT(".insn rrf,0xb9490000", cc_dep1,
1723                                        cc_dep2, cc_ndep);
1724
1725   case S390_CC_OP_DFP_64_TO_INT_64: /* CGDTR */
1726      return S390_CC_FOR_DFP_CONVERT(".insn rrf,0xb3e10000", cc_dep1, cc_dep2);
1727
1728   case S390_CC_OP_DFP_128_TO_INT_64: /* CGXTR */
1729      return S390_CC_FOR_DFP128_CONVERT(".insn rrf,0xb3e90000", cc_dep1,
1730                                        cc_dep2, cc_ndep);
1731
1732   case S390_CC_OP_DFP_64_TO_UINT_32: /* CLFDTR */
1733      return S390_CC_FOR_DFP_UCONVERT(".insn rrf,0xb9430000", cc_dep1, cc_dep2);
1734
1735   case S390_CC_OP_DFP_128_TO_UINT_32: /* CLFXTR */
1736      return S390_CC_FOR_DFP128_UCONVERT(".insn rrf,0xb94b0000", cc_dep1,
1737                                         cc_dep2, cc_ndep);
1738
1739   case S390_CC_OP_DFP_64_TO_UINT_64: /* CLGDTR */
1740      return S390_CC_FOR_DFP_UCONVERT(".insn rrf,0xb9420000", cc_dep1, cc_dep2);
1741
1742   case S390_CC_OP_DFP_128_TO_UINT_64: /* CLGXTR */
1743      return S390_CC_FOR_DFP128_UCONVERT(".insn rrf,0xb94a0000", cc_dep1,
1744                                         cc_dep2, cc_ndep);
1745
1746   case S390_CC_OP_PFPO_32: {
1747      __asm__ volatile(
1748           "ler 4, %[cc_dep1]\n\t"      /* 32 bit FR move */
1749           "lr  0, %[cc_dep2]\n\t"      /* 32 bit GR move */
1750           ".short 0x010a\n\t"          /* PFPO */
1751           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1752                                        : [cc_dep1] "f"(cc_dep1),
1753                                          [cc_dep2] "d"(cc_dep2)
1754                                        : "r0", "r1", "f4");
1755      return psw >> 28;  /* cc */
1756   }
1757
1758   case S390_CC_OP_PFPO_64: {
1759      __asm__ volatile(
1760           "ldr 4, %[cc_dep1]\n\t"
1761           "lr  0, %[cc_dep2]\n\t"      /* 32 bit register move */
1762           ".short 0x010a\n\t"          /* PFPO */
1763           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1764                                        : [cc_dep1] "f"(cc_dep1),
1765                                          [cc_dep2] "d"(cc_dep2)
1766                                        : "r0", "r1", "f4");
1767      return psw >> 28;  /* cc */
1768   }
1769
1770   case S390_CC_OP_PFPO_128: {
1771      __asm__ volatile(
1772           "ldr 4,%[cc_dep1]\n\t"
1773           "ldr 6,%[cc_dep2]\n\t"
1774           "lr  0,%[cc_ndep]\n\t"       /* 32 bit register move */
1775           ".short 0x010a\n\t"          /* PFPO */
1776           "ipm %[psw]\n\t"             : [psw] "=d"(psw)
1777                                        : [cc_dep1] "f"(cc_dep1),
1778                                          [cc_dep2] "f"(cc_dep2),
1779                                          [cc_ndep] "d"(cc_ndep)
1780                                        : "r0", "r1", "f0", "f2", "f4", "f6");
1781      return psw >> 28;  /* cc */
1782   }
1783
1784   default:
1785      break;
1786   }
1787#endif
1788   vpanic("s390_calculate_cc");
1789}
1790
1791
1792/* Note that this does *not* return a Boolean value. The result needs to be
1793   explicitly tested against zero. */
1794UInt
1795s390_calculate_cond(ULong mask, ULong op, ULong dep1, ULong dep2, ULong ndep)
1796{
1797   UInt cc = s390_calculate_cc(op, dep1, dep2, ndep);
1798
1799   return ((mask << cc) & 0x8);
1800}
1801
1802/*------------------------------------------------------------*/
1803/*--- spechelper for performance                           ---*/
1804/*------------------------------------------------------------*/
1805
1806
1807/* Convenience macros */
1808#define unop(op,a1) IRExpr_Unop((op),(a1))
1809#define binop(op,a1,a2) IRExpr_Binop((op),(a1),(a2))
1810#define mkU64(v) IRExpr_Const(IRConst_U64(v))
1811#define mkU32(v) IRExpr_Const(IRConst_U32(v))
1812#define mkU8(v)  IRExpr_Const(IRConst_U8(v))
1813
1814
1815static inline Bool
1816isC64(const IRExpr *expr)
1817{
1818   return expr->tag == Iex_Const && expr->Iex.Const.con->tag == Ico_U64;
1819}
1820
1821
1822/* The returned expression is NULL if no specialization was found. In that
1823   case the helper function will be called. Otherwise, the expression has
1824   type Ity_I32 and a Boolean value. */
1825IRExpr *
1826guest_s390x_spechelper(const HChar *function_name, IRExpr **args,
1827                       IRStmt **precedingStmts, Int n_precedingStmts)
1828{
1829   UInt i, arity = 0;
1830
1831   for (i = 0; args[i]; i++)
1832      arity++;
1833
1834#  if 0
1835   vex_printf("spec request:\n");
1836   vex_printf("   %s  ", function_name);
1837   for (i = 0; i < arity; i++) {
1838      vex_printf("  ");
1839      ppIRExpr(args[i]);
1840   }
1841   vex_printf("\n");
1842#  endif
1843
1844   /* --------- Specialising "s390_calculate_cond" --------- */
1845
1846   if (vex_streq(function_name, "s390_calculate_cond")) {
1847      IRExpr *cond_expr, *cc_op_expr, *cc_dep1, *cc_dep2;
1848      ULong cond, cc_op;
1849
1850      vassert(arity == 5);
1851
1852      cond_expr  = args[0];
1853      cc_op_expr = args[1];
1854
1855      /* The necessary requirement for all optimizations here is that the
1856         condition and the cc_op are constant. So check that upfront. */
1857      if (! isC64(cond_expr))  return NULL;
1858      if (! isC64(cc_op_expr)) return NULL;
1859
1860      cond    = cond_expr->Iex.Const.con->Ico.U64;
1861      cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
1862
1863      vassert(cond <= 15);
1864
1865      /*
1866        +------+---+---+---+---+
1867        | cc   | 0 | 1 | 2 | 3 |
1868        | cond | 8 | 4 | 2 | 1 |
1869        +------+---+---+---+---+
1870      */
1871      cc_dep1 = args[2];
1872      cc_dep2 = args[3];
1873
1874      /* S390_CC_OP_SIGNED_COMPARE */
1875      if (cc_op == S390_CC_OP_SIGNED_COMPARE) {
1876         /*
1877            cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
1878            cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
1879            cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
1880
1881            Because cc == 3 cannot occur the rightmost bit of cond is
1882            a don't care.
1883         */
1884         if (cond == 8 || cond == 8 + 1) {
1885            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1886         }
1887         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1888            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1889         }
1890         if (cond == 4 || cond == 4 + 1) {
1891            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1892         }
1893         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1894            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1895         }
1896         /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
1897         if (cond == 2 || cond == 2 + 1) {
1898            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1899         }
1900         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1901            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1902         }
1903         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1904            return mkU32(1);
1905         }
1906         /* Remaining case */
1907         return mkU32(0);
1908      }
1909
1910      /* S390_CC_OP_UNSIGNED_COMPARE */
1911      if (cc_op == S390_CC_OP_UNSIGNED_COMPARE) {
1912         /*
1913            cc == 0  --> cc_dep1 == cc_dep2   (cond == 8)
1914            cc == 1  --> cc_dep1 <  cc_dep2   (cond == 4)
1915            cc == 2  --> cc_dep1 >  cc_dep2   (cond == 2)
1916
1917            Because cc == 3 cannot occur the rightmost bit of cond is
1918            a don't care.
1919         */
1920         if (cond == 8 || cond == 8 + 1) {
1921            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1922         }
1923         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1924            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1925         }
1926         if (cond == 4 || cond == 4 + 1) {
1927            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1928         }
1929         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1930            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1931         }
1932         /* cc_dep1 > cc_dep2  ---->  cc_dep2 < cc_dep1 */
1933         if (cond == 2 || cond == 2 + 1) {
1934            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
1935         }
1936         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1937            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1938         }
1939         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1940            return mkU32(1);
1941         }
1942         /* Remaining case */
1943         return mkU32(0);
1944      }
1945
1946      /* S390_CC_OP_LOAD_AND_TEST */
1947      if (cc_op == S390_CC_OP_LOAD_AND_TEST) {
1948         /*
1949            cc == 0  --> cc_dep1 == 0   (cond == 8)
1950            cc == 1  --> cc_dep1 <  0   (cond == 4)
1951            cc == 2  --> cc_dep1 >  0   (cond == 2)
1952
1953            Because cc == 3 cannot occur the rightmost bit of cond is
1954            a don't care.
1955         */
1956         if (cond == 8 || cond == 8 + 1) {
1957            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1958         }
1959         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
1960            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1961         }
1962         if (cond == 4 || cond == 4 + 1) {
1963             /* Special case cc_dep < 0. Only check the MSB to avoid bogus
1964               memcheck complaints due to gcc magic. Fixes 343802
1965             */
1966            return unop(Iop_64to32, binop(Iop_Shr64, cc_dep1, mkU8(63)));
1967         }
1968         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
1969            return unop(Iop_1Uto32, binop(Iop_CmpLE64S, cc_dep1, mkU64(0)));
1970         }
1971         /* cc_dep1 > 0  ---->  0 < cc_dep1 */
1972         if (cond == 2 || cond == 2 + 1) {
1973            return unop(Iop_1Uto32, binop(Iop_CmpLT64S, mkU64(0), cc_dep1));
1974         }
1975         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
1976            /* Special case cc_dep >= 0. Only check the MSB to avoid bogus
1977               memcheck complaints due to gcc magic. Fixes 308427
1978             */
1979            return unop(Iop_64to32, binop(Iop_Xor64,
1980                                          binop(Iop_Shr64, cc_dep1, mkU8(63)),
1981                                          mkU64(1)));
1982         }
1983         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
1984            return mkU32(1);
1985         }
1986         /* Remaining case */
1987         return mkU32(0);
1988      }
1989
1990      /* S390_CC_OP_BITWISE */
1991      if (cc_op == S390_CC_OP_BITWISE) {
1992         /*
1993            cc_dep1 is the result of the boolean operation.
1994
1995            cc == 0  --> cc_dep1 == 0   (cond == 8)
1996            cc == 1  --> cc_dep1 != 0   (cond == 4)
1997
1998            Because cc == 2 and cc == 3 cannot occur the two rightmost bits of
1999            cond are don't cares. Therefore:
2000
2001            cond == 00xx  -> always false
2002            cond == 01xx  -> not equal
2003            cond == 10xx  -> equal
2004            cond == 11xx  -> always true
2005         */
2006         if ((cond & (8 + 4)) == 8 + 4) {
2007            return mkU32(1);
2008         }
2009         if (cond & 8) {
2010            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
2011         }
2012         if (cond & 4) {
2013            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2014         }
2015         /* Remaining case */
2016         return mkU32(0);
2017      }
2018
2019      /* S390_CC_OP_INSERT_CHAR_MASK_32
2020         Since the mask comes from an immediate field in the opcode, we
2021         expect the mask to be a constant here. That simplifies matters. */
2022      if (cc_op == S390_CC_OP_INSERT_CHAR_MASK_32) {
2023         ULong mask;
2024         UInt imask = 0, shift = 0;
2025         IRExpr *word;
2026
2027         if (! isC64(cc_dep2)) goto missed;
2028
2029         mask = cc_dep2->Iex.Const.con->Ico.U64;
2030
2031         /* Extract the 32-bit value from the thunk */
2032
2033         word = unop(Iop_64to32, cc_dep1);
2034
2035         switch (mask) {
2036         case 0:  shift =  0; imask = 0x00000000; break;
2037         case 1:  shift = 24; imask = 0x000000FF; break;
2038         case 2:  shift = 16; imask = 0x0000FF00; break;
2039         case 3:  shift = 16; imask = 0x0000FFFF; break;
2040         case 4:  shift =  8; imask = 0x00FF0000; break;
2041         case 5:  shift =  8; imask = 0x00FF00FF; break;
2042         case 6:  shift =  8; imask = 0x00FFFF00; break;
2043         case 7:  shift =  8; imask = 0x00FFFFFF; break;
2044         case 8:  shift =  0; imask = 0xFF000000; break;
2045         case 9:  shift =  0; imask = 0xFF0000FF; break;
2046         case 10: shift =  0; imask = 0xFF00FF00; break;
2047         case 11: shift =  0; imask = 0xFF00FFFF; break;
2048         case 12: shift =  0; imask = 0xFFFF0000; break;
2049         case 13: shift =  0; imask = 0xFFFF00FF; break;
2050         case 14: shift =  0; imask = 0xFFFFFF00; break;
2051         case 15: shift =  0; imask = 0xFFFFFFFF; break;
2052         }
2053
2054         /* Select the bits that were inserted */
2055         word = binop(Iop_And32, word, mkU32(imask));
2056
2057         /* cc == 0  --> all inserted bits zero or mask == 0   (cond == 8)
2058            cc == 1  --> leftmost inserted bit is one          (cond == 4)
2059            cc == 2  --> leftmost inserted bit is zero and not (cond == 2)
2060                         all inserted bits are zero
2061
2062            Because cc == 0,1,2 the rightmost bit of the mask is a don't care */
2063         if (cond == 8 || cond == 8 + 1) {
2064            return unop(Iop_1Uto32, binop(Iop_CmpEQ32, word, mkU32(0)));
2065         }
2066         if (cond == 4 + 2 || cond == 4 + 2 + 1) {
2067            return unop(Iop_1Uto32, binop(Iop_CmpNE32, word, mkU32(0)));
2068         }
2069
2070         /* Sign extend */
2071         if (shift != 0) {
2072            word = binop(Iop_Sar32, binop(Iop_Shl32, word, mkU8(shift)),
2073                         mkU8(shift));
2074         }
2075
2076         if (cond == 4 || cond == 4 + 1) {  /* word < 0 */
2077            return unop(Iop_1Uto32, binop(Iop_CmpLT32S, word, mkU32(0)));
2078         }
2079         if (cond == 2 || cond == 2 + 1) {  /* word > 0 */
2080            return unop(Iop_1Uto32, binop(Iop_CmpLT32S, mkU32(0), word));
2081         }
2082         if (cond == 8 + 4 || cond == 8 + 4 + 1) {
2083            return unop(Iop_1Uto32, binop(Iop_CmpLE32S, word, mkU32(0)));
2084         }
2085         if (cond == 8 + 2 || cond == 8 + 2 + 1) {
2086            return unop(Iop_1Uto32, binop(Iop_CmpLE32S, mkU32(0), word));
2087         }
2088         if (cond == 8 + 4 + 2 || cond == 8 + 4 + 2 + 1) {
2089            return mkU32(1);
2090         }
2091         /* Remaining case */
2092         return mkU32(0);
2093      }
2094
2095      /* S390_CC_OP_TEST_UNDER_MASK_8
2096         Since the mask comes from an immediate field in the opcode, we
2097         expect the mask to be a constant here. That simplifies matters. */
2098      if (cc_op == S390_CC_OP_TEST_UNDER_MASK_8) {
2099         ULong mask16;
2100
2101         if (! isC64(cc_dep2)) goto missed;
2102
2103         mask16 = cc_dep2->Iex.Const.con->Ico.U64;
2104
2105         /* Get rid of the mask16 == 0 case first. Some of the simplifications
2106            below (e.g. for OVFL) only hold if mask16 == 0.  */
2107         if (mask16 == 0) {   /* cc == 0 */
2108            if (cond & 0x8) return mkU32(1);
2109            return mkU32(0);
2110         }
2111
2112         /* cc == 2 is a don't care */
2113         if (cond == 8 || cond == 8 + 2) {
2114            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2115                                          binop(Iop_And64, cc_dep1, cc_dep2),
2116                                          mkU64(0)));
2117         }
2118         if (cond == 7 || cond == 7 - 2) {
2119            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2120                                          binop(Iop_And64, cc_dep1, cc_dep2),
2121                                          mkU64(0)));
2122         }
2123         if (cond == 1 || cond == 1 + 2) {
2124            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2125                                          binop(Iop_And64, cc_dep1, cc_dep2),
2126                                          cc_dep2));
2127         }
2128         if (cond == 14 || cond == 14 - 2) {  /* ! OVFL */
2129            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2130                                          binop(Iop_And64, cc_dep1, cc_dep2),
2131                                          cc_dep2));
2132         }
2133         goto missed;
2134      }
2135
2136      /* S390_CC_OP_TEST_UNDER_MASK_16
2137         Since the mask comes from an immediate field in the opcode, we
2138         expect the mask to be a constant here. That simplifies matters. */
2139      if (cc_op == S390_CC_OP_TEST_UNDER_MASK_16) {
2140         ULong mask16;
2141         UInt msb;
2142
2143         if (! isC64(cc_dep2)) goto missed;
2144
2145         mask16 = cc_dep2->Iex.Const.con->Ico.U64;
2146
2147         /* Get rid of the mask16 == 0 case first. Some of the simplifications
2148            below (e.g. for OVFL) only hold if mask16 == 0.  */
2149         if (mask16 == 0) {   /* cc == 0 */
2150            if (cond & 0x8) return mkU32(1);
2151            return mkU32(0);
2152         }
2153
2154         if (cond == 8) {
2155            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2156                                          binop(Iop_And64, cc_dep1, cc_dep2),
2157                                          mkU64(0)));
2158         }
2159         if (cond == 7) {
2160            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2161                                          binop(Iop_And64, cc_dep1, cc_dep2),
2162                                          mkU64(0)));
2163         }
2164         if (cond == 1) {
2165            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2166                                          binop(Iop_And64, cc_dep1, cc_dep2),
2167                                          mkU64(mask16)));
2168         }
2169         if (cond == 14) {  /* ! OVFL */
2170            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2171                                          binop(Iop_And64, cc_dep1, cc_dep2),
2172                                          mkU64(mask16)));
2173         }
2174
2175         /* Find MSB in mask */
2176         msb = 0x8000;
2177         while (msb > mask16)
2178            msb >>= 1;
2179
2180         if (cond == 2) {  /* cc == 2 */
2181            IRExpr *c1, *c2;
2182
2183            /* (cc_dep & msb) != 0 && (cc_dep & mask16) != mask16 */
2184            c1 = binop(Iop_CmpNE64,
2185                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2186            c2 = binop(Iop_CmpNE64,
2187                       binop(Iop_And64, cc_dep1, cc_dep2),
2188                       mkU64(mask16));
2189            return binop(Iop_And32, unop(Iop_1Uto32, c1),
2190                         unop(Iop_1Uto32, c2));
2191         }
2192
2193         if (cond == 4) {  /* cc == 1 */
2194            IRExpr *c1, *c2;
2195
2196            /* (cc_dep & msb) == 0 && (cc_dep & mask16) != 0 */
2197            c1 = binop(Iop_CmpEQ64,
2198                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2199            c2 = binop(Iop_CmpNE64,
2200                       binop(Iop_And64, cc_dep1, cc_dep2),
2201                       mkU64(0));
2202            return binop(Iop_And32, unop(Iop_1Uto32, c1),
2203                         unop(Iop_1Uto32, c2));
2204         }
2205
2206         if (cond == 11) {  /* cc == 0,2,3 */
2207            IRExpr *c1, *c2;
2208
2209            c1 = binop(Iop_CmpNE64,
2210                       binop(Iop_And64, cc_dep1, mkU64(msb)), mkU64(0));
2211            c2 = binop(Iop_CmpEQ64,
2212                       binop(Iop_And64, cc_dep1, cc_dep2),
2213                       mkU64(0));
2214            return binop(Iop_Or32, unop(Iop_1Uto32, c1),
2215                         unop(Iop_1Uto32, c2));
2216         }
2217
2218         if (cond == 3) {  /* cc == 2 || cc == 3 */
2219            return unop(Iop_1Uto32,
2220                        binop(Iop_CmpNE64,
2221                              binop(Iop_And64, cc_dep1, mkU64(msb)),
2222                              mkU64(0)));
2223         }
2224         if (cond == 12) { /* cc == 0 || cc == 1 */
2225            return unop(Iop_1Uto32,
2226                        binop(Iop_CmpEQ64,
2227                              binop(Iop_And64, cc_dep1, mkU64(msb)),
2228                              mkU64(0)));
2229         }
2230         // vex_printf("TUM mask = 0x%llx\n", mask16);
2231         goto missed;
2232      }
2233
2234      /* S390_CC_OP_UNSIGNED_SUB_64/32 */
2235      if (cc_op == S390_CC_OP_UNSIGNED_SUB_64 ||
2236          cc_op == S390_CC_OP_UNSIGNED_SUB_32) {
2237         /*
2238            cc_dep1, cc_dep2 are the zero extended left and right operands
2239
2240            cc == 1  --> result != 0, borrow    (cond == 4)
2241            cc == 2  --> result == 0, no borrow (cond == 2)
2242            cc == 3  --> result != 0, no borrow (cond == 1)
2243
2244            cc = (cc_dep1 == cc_dep2) ? 2
2245                                      : (cc_dep1 > cc_dep2) ? 3 : 1;
2246
2247            Because cc == 0 cannot occur the leftmost bit of cond is
2248            a don't care.
2249         */
2250         if (cond == 1 || cond == 1 + 8) {  /* cc == 3   op2 < op1 */
2251            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
2252         }
2253         if (cond == 2 || cond == 2 + 8) {  /* cc == 2 */
2254            return unop(Iop_1Uto32, binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
2255         }
2256         if (cond == 4 || cond == 4 + 8) {  /* cc == 1 */
2257            return unop(Iop_1Uto32, binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
2258         }
2259         if (cond == 3 || cond == 3 + 8) {  /* cc == 2 || cc == 3 */
2260            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
2261         }
2262         if (cond == 6 || cond == 6 + 8) {  /* cc == 2 || cc == 1 */
2263            return unop(Iop_1Uto32, binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
2264         }
2265
2266         if (cond == 5 || cond == 5 + 8) {  /* cc == 3 || cc == 1 */
2267            return unop(Iop_1Uto32, binop(Iop_CmpNE64, cc_dep1, cc_dep2));
2268         }
2269         if (cond == 7 || cond == 7 + 8) {
2270            return mkU32(1);
2271         }
2272         /* Remaining case */
2273         return mkU32(0);
2274      }
2275
2276      /* S390_CC_OP_UNSIGNED_ADD_64 */
2277      if (cc_op == S390_CC_OP_UNSIGNED_ADD_64) {
2278         /*
2279            cc_dep1, cc_dep2 are the zero extended left and right operands
2280
2281            cc == 0  --> result == 0, no carry  (cond == 8)
2282            cc == 1  --> result != 0, no carry  (cond == 4)
2283            cc == 2  --> result == 0, carry     (cond == 2)
2284            cc == 3  --> result != 0, carry     (cond == 1)
2285         */
2286         if (cond == 8) { /* cc == 0 */
2287            /* Both inputs are 0 */
2288            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2289                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2290                                          mkU64(0)));
2291         }
2292         if (cond == 7) { /* cc == 1,2,3 */
2293            /* Not both inputs are 0 */
2294            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2295                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2296                                          mkU64(0)));
2297         }
2298         if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
2299            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2300                                          binop(Iop_Add64, cc_dep1, cc_dep2),
2301                                          mkU64(0)));
2302         }
2303         if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
2304            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2305                                          binop(Iop_Add64, cc_dep1, cc_dep2),
2306                                          mkU64(0)));
2307         }
2308         goto missed;
2309      }
2310
2311      /* S390_CC_OP_UNSIGNED_ADD_32 */
2312      if (cc_op == S390_CC_OP_UNSIGNED_ADD_32) {
2313         /*
2314            cc_dep1, cc_dep2 are the zero extended left and right operands
2315
2316            cc == 0  --> result == 0, no carry  (cond == 8)
2317            cc == 1  --> result != 0, no carry  (cond == 4)
2318            cc == 2  --> result == 0, carry     (cond == 2)
2319            cc == 3  --> result != 0, carry     (cond == 1)
2320         */
2321         if (cond == 8) { /* cc == 0 */
2322            /* Both inputs are 0 */
2323            return unop(Iop_1Uto32, binop(Iop_CmpEQ64,
2324                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2325                                          mkU64(0)));
2326         }
2327         if (cond == 7) { /* cc == 1,2,3 */
2328            /* Not both inputs are 0 */
2329            return unop(Iop_1Uto32, binop(Iop_CmpNE64,
2330                                          binop(Iop_Or64, cc_dep1, cc_dep2),
2331                                          mkU64(0)));
2332         }
2333         if (cond == 8 + 2) {  /* cc == 0,2  -> result is zero */
2334            return unop(Iop_1Uto32, binop(Iop_CmpEQ32,
2335                                          binop(Iop_Add32,
2336                                                unop(Iop_64to32, cc_dep1),
2337                                                unop(Iop_64to32, cc_dep2)),
2338                                          mkU32(0)));
2339         }
2340         if (cond == 4 + 1) {  /* cc == 1,3  -> result is not zero */
2341            return unop(Iop_1Uto32, binop(Iop_CmpNE32,
2342                                          binop(Iop_Add32,
2343                                                unop(Iop_64to32, cc_dep1),
2344                                                unop(Iop_64to32, cc_dep2)),
2345                                          mkU32(0)));
2346         }
2347         goto missed;
2348      }
2349
2350      /* S390_CC_OP_SET */
2351      if (cc_op == S390_CC_OP_SET) {
2352         /* cc_dep1 is the condition code
2353
2354            Return 1, if ((cond << cc_dep1) & 0x8) != 0 */
2355
2356        return unop(Iop_1Uto32,
2357                    binop(Iop_CmpNE64,
2358                          binop(Iop_And64,
2359                                binop(Iop_Shl64, cond_expr,
2360                                      unop(Iop_64to8, cc_dep1)),
2361                                mkU64(8)),
2362                          mkU64(0)));
2363      }
2364
2365      goto missed;
2366   }
2367
2368   /* --------- Specialising "s390_calculate_cond" --------- */
2369
2370   if (vex_streq(function_name, "s390_calculate_cc")) {
2371      IRExpr *cc_op_expr, *cc_dep1;
2372      ULong cc_op;
2373
2374      vassert(arity == 4);
2375
2376      cc_op_expr = args[0];
2377
2378      /* The necessary requirement for all optimizations here is that
2379         cc_op is constant. So check that upfront. */
2380      if (! isC64(cc_op_expr)) return NULL;
2381
2382      cc_op   = cc_op_expr->Iex.Const.con->Ico.U64;
2383      cc_dep1 = args[1];
2384
2385      if (cc_op == S390_CC_OP_BITWISE) {
2386         return unop(Iop_1Uto32,
2387                     binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
2388      }
2389
2390      if (cc_op == S390_CC_OP_SET) {
2391         return unop(Iop_64to32, cc_dep1);
2392      }
2393
2394      goto missed;
2395   }
2396
2397missed:
2398   return NULL;
2399}
2400
2401/*---------------------------------------------------------------*/
2402/*--- end                                guest_s390_helpers.c ---*/
2403/*---------------------------------------------------------------*/
2404