1/*
2 *  ARM translation
3 *
4 *  Copyright (c) 2003 Fabrice Bellard
5 *  Copyright (c) 2005-2007 CodeSourcery
6 *  Copyright (c) 2007 OpenedHand, Ltd.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 */
21#include <stdarg.h>
22#include <stdlib.h>
23#include <stdio.h>
24#include <string.h>
25#include <inttypes.h>
26
27#include "cpu.h"
28#include "exec/exec-all.h"
29#include "disas/disas.h"
30#include "tcg-op.h"
31#include "qemu/log.h"
32
33#include "helper.h"
34#define GEN_HELPER 1
35#include "helper.h"
36
37#define ENABLE_ARCH_4T    arm_feature(env, ARM_FEATURE_V4T)
38#define ENABLE_ARCH_5     arm_feature(env, ARM_FEATURE_V5)
39/* currently all emulated v5 cores are also v5TE, so don't bother */
40#define ENABLE_ARCH_5TE   arm_feature(env, ARM_FEATURE_V5)
41#define ENABLE_ARCH_5J    0
42#define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
43#define ENABLE_ARCH_6K   arm_feature(env, ARM_FEATURE_V6K)
44#define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
45#define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
46
47#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
48
49/* internal defines */
50typedef struct DisasContext {
51    target_ulong pc;
52    int is_jmp;
53    /* Nonzero if this instruction has been conditionally skipped.  */
54    int condjmp;
55    /* The label that will be jumped to when the instruction is skipped.  */
56    int condlabel;
57    /* Thumb-2 condtional execution bits.  */
58    int condexec_mask;
59    int condexec_cond;
60    struct TranslationBlock *tb;
61    int singlestep_enabled;
62    int thumb;
63#if !defined(CONFIG_USER_ONLY)
64    int user;
65#endif
66    int vfp_enabled;
67    int vec_len;
68    int vec_stride;
69} DisasContext;
70
71static uint32_t gen_opc_condexec_bits[OPC_BUF_SIZE];
72
73#if defined(CONFIG_USER_ONLY)
74#define IS_USER(s) 1
75#else
76#define IS_USER(s) (s->user)
77#endif
78
79/* These instructions trap after executing, so defer them until after the
80   conditional executions state has been updated.  */
81#define DISAS_WFI 4
82#define DISAS_SWI 5
83#define DISAS_SMC 6
84
85static TCGv_ptr cpu_env;
86/* We reuse the same 64-bit temporaries for efficiency.  */
87static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
88static TCGv_i32 cpu_R[16];
89static TCGv_i32 cpu_exclusive_addr;
90static TCGv_i32 cpu_exclusive_val;
91static TCGv_i32 cpu_exclusive_high;
92#ifdef CONFIG_USER_ONLY
93static TCGv_i32 cpu_exclusive_test;
94static TCGv_i32 cpu_exclusive_info;
95#endif
96
97/* FIXME:  These should be removed.  */
98static TCGv cpu_F0s, cpu_F1s;
99static TCGv_i64 cpu_F0d, cpu_F1d;
100
101#include "exec/gen-icount.h"
102
103static const char *regnames[] =
104    { "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
105      "r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
106
107/* initialize TCG globals.  */
108void arm_translate_init(void)
109{
110    int i;
111
112    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
113
114    for (i = 0; i < 16; i++) {
115        cpu_R[i] = tcg_global_mem_new_i32(TCG_AREG0,
116                                          offsetof(CPUARMState, regs[i]),
117                                          regnames[i]);
118    }
119    cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
120        offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
121    cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
122        offsetof(CPUARMState, exclusive_val), "exclusive_val");
123    cpu_exclusive_high = tcg_global_mem_new_i32(TCG_AREG0,
124        offsetof(CPUARMState, exclusive_high), "exclusive_high");
125#ifdef CONFIG_USER_ONLY
126    cpu_exclusive_test = tcg_global_mem_new_i32(TCG_AREG0,
127        offsetof(CPUARMState, exclusive_test), "exclusive_test");
128    cpu_exclusive_info = tcg_global_mem_new_i32(TCG_AREG0,
129        offsetof(CPUARMState, exclusive_info), "exclusive_info");
130#endif
131}
132
133static inline TCGv load_cpu_offset(int offset)
134{
135    TCGv tmp = tcg_temp_new_i32();
136    tcg_gen_ld_i32(tmp, cpu_env, offset);
137    return tmp;
138}
139
140#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
141
142static inline void store_cpu_offset(TCGv var, int offset)
143{
144    tcg_gen_st_i32(var, cpu_env, offset);
145    tcg_temp_free_i32(var);
146}
147
148#define store_cpu_field(var, name) \
149    store_cpu_offset(var, offsetof(CPUARMState, name))
150
151/* Set a variable to the value of a CPU register.  */
152static void load_reg_var(DisasContext *s, TCGv var, int reg)
153{
154    if (reg == 15) {
155        uint32_t addr;
156        /* normaly, since we updated PC, we need only to add one insn */
157        if (s->thumb)
158            addr = (long)s->pc + 2;
159        else
160            addr = (long)s->pc + 4;
161        tcg_gen_movi_i32(var, addr);
162    } else {
163        tcg_gen_mov_i32(var, cpu_R[reg]);
164    }
165}
166
167/* Create a new temporary and set it to the value of a CPU register.  */
168static inline TCGv load_reg(DisasContext *s, int reg)
169{
170    TCGv tmp = tcg_temp_new_i32();
171    load_reg_var(s, tmp, reg);
172    return tmp;
173}
174
175/* Set a CPU register.  The source must be a temporary and will be
176   marked as dead.  */
177static void store_reg(DisasContext *s, int reg, TCGv var)
178{
179    if (reg == 15) {
180        tcg_gen_andi_i32(var, var, ~1);
181        s->is_jmp = DISAS_JUMP;
182    }
183    tcg_gen_mov_i32(cpu_R[reg], var);
184    tcg_temp_free_i32(var);
185}
186
187/* Value extensions.  */
188#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
189#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
190#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
191#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
192
193#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
194#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
195
196
197static inline void gen_set_cpsr(TCGv var, uint32_t mask)
198{
199    TCGv tmp_mask = tcg_const_i32(mask);
200    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
201    tcg_temp_free_i32(tmp_mask);
202}
203/* Set NZCV flags from the high 4 bits of var.  */
204#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
205
206static void gen_exception(int excp)
207{
208    TCGv tmp = tcg_temp_new_i32();
209    tcg_gen_movi_i32(tmp, excp);
210    gen_helper_exception(cpu_env, tmp);
211    tcg_temp_free_i32(tmp);
212}
213
214static void gen_smul_dual(TCGv a, TCGv b)
215{
216    TCGv tmp1 = tcg_temp_new_i32();
217    TCGv tmp2 = tcg_temp_new_i32();
218    tcg_gen_ext16s_i32(tmp1, a);
219    tcg_gen_ext16s_i32(tmp2, b);
220    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
221    tcg_temp_free_i32(tmp2);
222    tcg_gen_sari_i32(a, a, 16);
223    tcg_gen_sari_i32(b, b, 16);
224    tcg_gen_mul_i32(b, b, a);
225    tcg_gen_mov_i32(a, tmp1);
226    tcg_temp_free_i32(tmp1);
227}
228
229/* Byteswap each halfword.  */
230static void gen_rev16(TCGv var)
231{
232    TCGv tmp = tcg_temp_new_i32();
233    tcg_gen_shri_i32(tmp, var, 8);
234    tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
235    tcg_gen_shli_i32(var, var, 8);
236    tcg_gen_andi_i32(var, var, 0xff00ff00);
237    tcg_gen_or_i32(var, var, tmp);
238    tcg_temp_free_i32(tmp);
239}
240
241/* Byteswap low halfword and sign extend.  */
242static void gen_revsh(TCGv var)
243{
244    tcg_gen_ext16u_i32(var, var);
245    tcg_gen_bswap16_i32(var, var);
246    tcg_gen_ext16s_i32(var, var);
247}
248
249/* Unsigned bitfield extract.  */
250static void gen_ubfx(TCGv var, int shift, uint32_t mask)
251{
252    if (shift)
253        tcg_gen_shri_i32(var, var, shift);
254    tcg_gen_andi_i32(var, var, mask);
255}
256
257/* Signed bitfield extract.  */
258static void gen_sbfx(TCGv var, int shift, int width)
259{
260    uint32_t signbit;
261
262    if (shift)
263        tcg_gen_sari_i32(var, var, shift);
264    if (shift + width < 32) {
265        signbit = 1u << (width - 1);
266        tcg_gen_andi_i32(var, var, (1u << width) - 1);
267        tcg_gen_xori_i32(var, var, signbit);
268        tcg_gen_subi_i32(var, var, signbit);
269    }
270}
271
272/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
273static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
274{
275    tcg_gen_andi_i32(val, val, mask);
276    tcg_gen_shli_i32(val, val, shift);
277    tcg_gen_andi_i32(base, base, ~(mask << shift));
278    tcg_gen_or_i32(dest, base, val);
279}
280
281/* Return (b << 32) + a. Mark inputs as dead */
282static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
283{
284    TCGv_i64 tmp64 = tcg_temp_new_i64();
285
286    tcg_gen_extu_i32_i64(tmp64, b);
287    tcg_temp_free_i32(b);
288    tcg_gen_shli_i64(tmp64, tmp64, 32);
289    tcg_gen_add_i64(a, tmp64, a);
290
291    tcg_temp_free_i64(tmp64);
292    return a;
293}
294
295/* Return (b << 32) - a. Mark inputs as dead. */
296static TCGv_i64 gen_subq_msw(TCGv_i64 a, TCGv b)
297{
298    TCGv_i64 tmp64 = tcg_temp_new_i64();
299
300    tcg_gen_extu_i32_i64(tmp64, b);
301    tcg_temp_free_i32(b);
302    tcg_gen_shli_i64(tmp64, tmp64, 32);
303    tcg_gen_sub_i64(a, tmp64, a);
304
305    tcg_temp_free_i64(tmp64);
306    return a;
307}
308
309/* FIXME: Most targets have native widening multiplication.
310   It would be good to use that instead of a full wide multiply.  */
311/* 32x32->64 multiply.  Marks inputs as dead.  */
312static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
313{
314    TCGv_i64 tmp1 = tcg_temp_new_i64();
315    TCGv_i64 tmp2 = tcg_temp_new_i64();
316
317    tcg_gen_extu_i32_i64(tmp1, a);
318    tcg_temp_free_i32(a);
319    tcg_gen_extu_i32_i64(tmp2, b);
320    tcg_temp_free_i32(b);
321    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
322    tcg_temp_free_i64(tmp2);
323    return tmp1;
324}
325
326static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
327{
328    TCGv_i64 tmp1 = tcg_temp_new_i64();
329    TCGv_i64 tmp2 = tcg_temp_new_i64();
330
331    tcg_gen_ext_i32_i64(tmp1, a);
332    tcg_temp_free_i32(a);
333    tcg_gen_ext_i32_i64(tmp2, b);
334    tcg_temp_free_i32(b);
335    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
336    tcg_temp_free_i64(tmp2);
337    return tmp1;
338}
339
340/* Swap low and high halfwords.  */
341static void gen_swap_half(TCGv var)
342{
343    TCGv tmp = tcg_temp_new_i32();
344    tcg_gen_shri_i32(tmp, var, 16);
345    tcg_gen_shli_i32(var, var, 16);
346    tcg_gen_or_i32(var, var, tmp);
347    tcg_temp_free_i32(tmp);
348}
349
350/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
351    tmp = (t0 ^ t1) & 0x8000;
352    t0 &= ~0x8000;
353    t1 &= ~0x8000;
354    t0 = (t0 + t1) ^ tmp;
355 */
356
357static void gen_add16(TCGv t0, TCGv t1)
358{
359    TCGv tmp = tcg_temp_new_i32();
360    tcg_gen_xor_i32(tmp, t0, t1);
361    tcg_gen_andi_i32(tmp, tmp, 0x8000);
362    tcg_gen_andi_i32(t0, t0, ~0x8000);
363    tcg_gen_andi_i32(t1, t1, ~0x8000);
364    tcg_gen_add_i32(t0, t0, t1);
365    tcg_gen_xor_i32(t0, t0, tmp);
366    tcg_temp_free_i32(tmp);
367    tcg_temp_free_i32(t1);
368}
369
370#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, CF))
371
372/* Set CF to the top bit of var.  */
373static void gen_set_CF_bit31(TCGv var)
374{
375    TCGv tmp = tcg_temp_new_i32();
376    tcg_gen_shri_i32(tmp, var, 31);
377    gen_set_CF(tmp);
378    tcg_temp_free_i32(tmp);
379}
380
381/* Set N and Z flags from var.  */
382static inline void gen_logic_CC(TCGv var)
383{
384    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, NF));
385    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, ZF));
386}
387
388/* T0 += T1 + CF.  */
389static void gen_adc(TCGv t0, TCGv t1)
390{
391    TCGv tmp;
392    tcg_gen_add_i32(t0, t0, t1);
393    tmp = load_cpu_field(CF);
394    tcg_gen_add_i32(t0, t0, tmp);
395    tcg_temp_free_i32(tmp);
396}
397
398/* dest = T0 + T1 + CF. */
399static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
400{
401    TCGv tmp;
402    tcg_gen_add_i32(dest, t0, t1);
403    tmp = load_cpu_field(CF);
404    tcg_gen_add_i32(dest, dest, tmp);
405    tcg_temp_free_i32(tmp);
406}
407
408/* dest = T0 - T1 + CF - 1.  */
409static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
410{
411    TCGv tmp;
412    tcg_gen_sub_i32(dest, t0, t1);
413    tmp = load_cpu_field(CF);
414    tcg_gen_add_i32(dest, dest, tmp);
415    tcg_gen_subi_i32(dest, dest, 1);
416    tcg_temp_free_i32(tmp);
417}
418
419/* FIXME:  Implement this natively.  */
420#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
421
422static void shifter_out_im(TCGv var, int shift)
423{
424    TCGv tmp = tcg_temp_new_i32();
425    if (shift == 0) {
426        tcg_gen_andi_i32(tmp, var, 1);
427    } else {
428        tcg_gen_shri_i32(tmp, var, shift);
429        if (shift != 31)
430            tcg_gen_andi_i32(tmp, tmp, 1);
431    }
432    gen_set_CF(tmp);
433    tcg_temp_free_i32(tmp);
434}
435
436/* Shift by immediate.  Includes special handling for shift == 0.  */
437static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
438{
439    switch (shiftop) {
440    case 0: /* LSL */
441        if (shift != 0) {
442            if (flags)
443                shifter_out_im(var, 32 - shift);
444            tcg_gen_shli_i32(var, var, shift);
445        }
446        break;
447    case 1: /* LSR */
448        if (shift == 0) {
449            if (flags) {
450                tcg_gen_shri_i32(var, var, 31);
451                gen_set_CF(var);
452            }
453            tcg_gen_movi_i32(var, 0);
454        } else {
455            if (flags)
456                shifter_out_im(var, shift - 1);
457            tcg_gen_shri_i32(var, var, shift);
458        }
459        break;
460    case 2: /* ASR */
461        if (shift == 0)
462            shift = 32;
463        if (flags)
464            shifter_out_im(var, shift - 1);
465        if (shift == 32)
466          shift = 31;
467        tcg_gen_sari_i32(var, var, shift);
468        break;
469    case 3: /* ROR/RRX */
470        if (shift != 0) {
471            if (flags)
472                shifter_out_im(var, shift - 1);
473            tcg_gen_rotri_i32(var, var, shift); break;
474        } else {
475            TCGv tmp = load_cpu_field(CF);
476            if (flags)
477                shifter_out_im(var, 0);
478            tcg_gen_shri_i32(var, var, 1);
479            tcg_gen_shli_i32(tmp, tmp, 31);
480            tcg_gen_or_i32(var, var, tmp);
481            tcg_temp_free_i32(tmp);
482        }
483    }
484};
485
486static inline void gen_arm_shift_reg(TCGv var, int shiftop,
487                                     TCGv shift, int flags)
488{
489    if (flags) {
490        switch (shiftop) {
491        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
492        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
493        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
494        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
495        }
496    } else {
497        switch (shiftop) {
498        case 0: gen_helper_shl(var, var, shift); break;
499        case 1: gen_helper_shr(var, var, shift); break;
500        case 2: gen_helper_sar(var, var, shift); break;
501        case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
502                tcg_gen_rotr_i32(var, var, shift); break;
503        }
504    }
505    tcg_temp_free_i32(shift);
506}
507
508#define PAS_OP(pfx) \
509    switch (op2) {  \
510    case 0: gen_pas_helper(glue(pfx,add16)); break; \
511    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
512    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
513    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
514    case 4: gen_pas_helper(glue(pfx,add8)); break; \
515    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
516    }
517static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
518{
519    TCGv_ptr tmp;
520
521    switch (op1) {
522#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
523    case 1:
524        tmp = tcg_temp_new_ptr();
525        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
526        PAS_OP(s)
527        tcg_temp_free_ptr(tmp);
528        break;
529    case 5:
530        tmp = tcg_temp_new_ptr();
531        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
532        PAS_OP(u)
533        tcg_temp_free_ptr(tmp);
534        break;
535#undef gen_pas_helper
536#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
537    case 2:
538        PAS_OP(q);
539        break;
540    case 3:
541        PAS_OP(sh);
542        break;
543    case 6:
544        PAS_OP(uq);
545        break;
546    case 7:
547        PAS_OP(uh);
548        break;
549#undef gen_pas_helper
550    }
551}
552#undef PAS_OP
553
554/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
555#define PAS_OP(pfx) \
556    switch (op1) {  \
557    case 0: gen_pas_helper(glue(pfx,add8)); break; \
558    case 1: gen_pas_helper(glue(pfx,add16)); break; \
559    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
560    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
561    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
562    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
563    }
564static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
565{
566    TCGv_ptr tmp;
567
568    switch (op2) {
569#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
570    case 0:
571        tmp = tcg_temp_new_ptr();
572        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
573        PAS_OP(s)
574        tcg_temp_free_ptr(tmp);
575        break;
576    case 4:
577        tmp = tcg_temp_new_ptr();
578        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUARMState, GE));
579        PAS_OP(u)
580        tcg_temp_free_ptr(tmp);
581        break;
582#undef gen_pas_helper
583#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
584    case 1:
585        PAS_OP(q);
586        break;
587    case 2:
588        PAS_OP(sh);
589        break;
590    case 5:
591        PAS_OP(uq);
592        break;
593    case 6:
594        PAS_OP(uh);
595        break;
596#undef gen_pas_helper
597    }
598}
599#undef PAS_OP
600
601static void gen_test_cc(int cc, int label)
602{
603    TCGv tmp;
604    TCGv tmp2;
605    int inv;
606
607    switch (cc) {
608    case 0: /* eq: Z */
609        tmp = load_cpu_field(ZF);
610        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
611        break;
612    case 1: /* ne: !Z */
613        tmp = load_cpu_field(ZF);
614        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
615        break;
616    case 2: /* cs: C */
617        tmp = load_cpu_field(CF);
618        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
619        break;
620    case 3: /* cc: !C */
621        tmp = load_cpu_field(CF);
622        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
623        break;
624    case 4: /* mi: N */
625        tmp = load_cpu_field(NF);
626        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
627        break;
628    case 5: /* pl: !N */
629        tmp = load_cpu_field(NF);
630        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
631        break;
632    case 6: /* vs: V */
633        tmp = load_cpu_field(VF);
634        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
635        break;
636    case 7: /* vc: !V */
637        tmp = load_cpu_field(VF);
638        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
639        break;
640    case 8: /* hi: C && !Z */
641        inv = gen_new_label();
642        tmp = load_cpu_field(CF);
643        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
644        tcg_temp_free_i32(tmp);
645        tmp = load_cpu_field(ZF);
646        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
647        gen_set_label(inv);
648        break;
649    case 9: /* ls: !C || Z */
650        tmp = load_cpu_field(CF);
651        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
652        tcg_temp_free_i32(tmp);
653        tmp = load_cpu_field(ZF);
654        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
655        break;
656    case 10: /* ge: N == V -> N ^ V == 0 */
657        tmp = load_cpu_field(VF);
658        tmp2 = load_cpu_field(NF);
659        tcg_gen_xor_i32(tmp, tmp, tmp2);
660        tcg_temp_free_i32(tmp2);
661        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
662        break;
663    case 11: /* lt: N != V -> N ^ V != 0 */
664        tmp = load_cpu_field(VF);
665        tmp2 = load_cpu_field(NF);
666        tcg_gen_xor_i32(tmp, tmp, tmp2);
667        tcg_temp_free_i32(tmp2);
668        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
669        break;
670    case 12: /* gt: !Z && N == V */
671        inv = gen_new_label();
672        tmp = load_cpu_field(ZF);
673        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
674        tcg_temp_free_i32(tmp);
675        tmp = load_cpu_field(VF);
676        tmp2 = load_cpu_field(NF);
677        tcg_gen_xor_i32(tmp, tmp, tmp2);
678        tcg_temp_free_i32(tmp2);
679        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
680        gen_set_label(inv);
681        break;
682    case 13: /* le: Z || N != V */
683        tmp = load_cpu_field(ZF);
684        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
685        tcg_temp_free_i32(tmp);
686        tmp = load_cpu_field(VF);
687        tmp2 = load_cpu_field(NF);
688        tcg_gen_xor_i32(tmp, tmp, tmp2);
689        tcg_temp_free_i32(tmp2);
690        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
691        break;
692    default:
693        fprintf(stderr, "Bad condition code 0x%x\n", cc);
694        abort();
695    }
696    tcg_temp_free_i32(tmp);
697}
698
699static const uint8_t table_logic_cc[16] = {
700    1, /* and */
701    1, /* xor */
702    0, /* sub */
703    0, /* rsb */
704    0, /* add */
705    0, /* adc */
706    0, /* sbc */
707    0, /* rsc */
708    1, /* andl */
709    1, /* xorl */
710    0, /* cmp */
711    0, /* cmn */
712    1, /* orr */
713    1, /* mov */
714    1, /* bic */
715    1, /* mvn */
716};
717
718/* Set PC and Thumb state from an immediate address.  */
719static inline void gen_bx_im(DisasContext *s, uint32_t addr)
720{
721    TCGv tmp;
722
723    s->is_jmp = DISAS_UPDATE;
724    if (s->thumb != (addr & 1)) {
725        tmp = tcg_temp_new_i32();
726        tcg_gen_movi_i32(tmp, addr & 1);
727        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUARMState, thumb));
728        tcg_temp_free_i32(tmp);
729    }
730    tcg_gen_movi_i32(cpu_R[15], addr & ~1);
731}
732
733/* Set PC and Thumb state from var.  var is marked as dead.  */
734static inline void gen_bx(DisasContext *s, TCGv var)
735{
736    s->is_jmp = DISAS_UPDATE;
737    tcg_gen_andi_i32(cpu_R[15], var, ~1);
738    tcg_gen_andi_i32(var, var, 1);
739    store_cpu_field(var, thumb);
740}
741
742/* Variant of store_reg which uses branch&exchange logic when storing
743   to r15 in ARM architecture v7 and above. The source must be a temporary
744   and will be marked as dead. */
745static inline void store_reg_bx(CPUARMState *env, DisasContext *s,
746                                int reg, TCGv var)
747{
748    if (reg == 15 && ENABLE_ARCH_7) {
749        gen_bx(s, var);
750    } else {
751        store_reg(s, reg, var);
752    }
753}
754
755/* Variant of store_reg which uses branch&exchange logic when storing
756 * to r15 in ARM architecture v5T and above. This is used for storing
757 * the results of a LDR/LDM/POP into r15, and corresponds to the cases
758 * in the ARM ARM which use the LoadWritePC() pseudocode function. */
759static inline void store_reg_from_load(CPUARMState *env, DisasContext *s,
760                                int reg, TCGv var)
761{
762    if (reg == 15 && ENABLE_ARCH_5) {
763        gen_bx(s, var);
764    } else {
765        store_reg(s, reg, var);
766    }
767}
768
769static inline void gen_smc(CPUARMState *env, DisasContext *s)
770{
771    tcg_gen_movi_i32(cpu_R[15], s->pc);
772    s->is_jmp = DISAS_SMC;
773}
774
775static inline TCGv gen_ld8s(TCGv addr, int index)
776{
777    TCGv tmp = tcg_temp_new_i32();
778    tcg_gen_qemu_ld8s(tmp, addr, index);
779    return tmp;
780}
781static inline TCGv gen_ld8u(TCGv addr, int index)
782{
783    TCGv tmp = tcg_temp_new_i32();
784    tcg_gen_qemu_ld8u(tmp, addr, index);
785    return tmp;
786}
787static inline TCGv gen_ld16s(TCGv addr, int index)
788{
789    TCGv tmp = tcg_temp_new_i32();
790    tcg_gen_qemu_ld16s(tmp, addr, index);
791    return tmp;
792}
793static inline TCGv gen_ld16u(TCGv addr, int index)
794{
795    TCGv tmp = tcg_temp_new_i32();
796    tcg_gen_qemu_ld16u(tmp, addr, index);
797    return tmp;
798}
799static inline TCGv gen_ld32(TCGv addr, int index)
800{
801    TCGv tmp = tcg_temp_new_i32();
802    tcg_gen_qemu_ld32u(tmp, addr, index);
803    return tmp;
804}
805static inline TCGv_i64 gen_ld64(TCGv addr, int index)
806{
807    TCGv_i64 tmp = tcg_temp_new_i64();
808    tcg_gen_qemu_ld64(tmp, addr, index);
809    return tmp;
810}
811static inline void gen_st8(TCGv val, TCGv addr, int index)
812{
813    tcg_gen_qemu_st8(val, addr, index);
814    tcg_temp_free_i32(val);
815}
816static inline void gen_st16(TCGv val, TCGv addr, int index)
817{
818    tcg_gen_qemu_st16(val, addr, index);
819    tcg_temp_free_i32(val);
820}
821static inline void gen_st32(TCGv val, TCGv addr, int index)
822{
823    tcg_gen_qemu_st32(val, addr, index);
824    tcg_temp_free_i32(val);
825}
826static inline void gen_st64(TCGv_i64 val, TCGv addr, int index)
827{
828    tcg_gen_qemu_st64(val, addr, index);
829    tcg_temp_free_i64(val);
830}
831
832static inline void gen_set_pc_im(uint32_t val)
833{
834    tcg_gen_movi_i32(cpu_R[15], val);
835}
836
837/* Force a TB lookup after an instruction that changes the CPU state.  */
838static inline void gen_lookup_tb(DisasContext *s)
839{
840    tcg_gen_movi_i32(cpu_R[15], s->pc & ~1);
841    s->is_jmp = DISAS_UPDATE;
842}
843
844static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
845                                       TCGv var)
846{
847    int val, rm, shift, shiftop;
848    TCGv offset;
849
850    if (!(insn & (1 << 25))) {
851        /* immediate */
852        val = insn & 0xfff;
853        if (!(insn & (1 << 23)))
854            val = -val;
855        if (val != 0)
856            tcg_gen_addi_i32(var, var, val);
857    } else {
858        /* shift/register */
859        rm = (insn) & 0xf;
860        shift = (insn >> 7) & 0x1f;
861        shiftop = (insn >> 5) & 3;
862        offset = load_reg(s, rm);
863        gen_arm_shift_im(offset, shiftop, shift, 0);
864        if (!(insn & (1 << 23)))
865            tcg_gen_sub_i32(var, var, offset);
866        else
867            tcg_gen_add_i32(var, var, offset);
868        tcg_temp_free_i32(offset);
869    }
870}
871
872static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
873                                        int extra, TCGv var)
874{
875    int val, rm;
876    TCGv offset;
877
878    if (insn & (1 << 22)) {
879        /* immediate */
880        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
881        if (!(insn & (1 << 23)))
882            val = -val;
883        val += extra;
884        if (val != 0)
885            tcg_gen_addi_i32(var, var, val);
886    } else {
887        /* register */
888        if (extra)
889            tcg_gen_addi_i32(var, var, extra);
890        rm = (insn) & 0xf;
891        offset = load_reg(s, rm);
892        if (!(insn & (1 << 23)))
893            tcg_gen_sub_i32(var, var, offset);
894        else
895            tcg_gen_add_i32(var, var, offset);
896        tcg_temp_free_i32(offset);
897    }
898}
899
900static TCGv_ptr get_fpstatus_ptr(int neon)
901{
902    TCGv_ptr statusptr = tcg_temp_new_ptr();
903    int offset;
904    if (neon) {
905        offset = offsetof(CPUARMState, vfp.standard_fp_status);
906    } else {
907        offset = offsetof(CPUARMState, vfp.fp_status);
908    }
909    tcg_gen_addi_ptr(statusptr, cpu_env, offset);
910    return statusptr;
911}
912
913#define VFP_OP2(name)                                                 \
914static inline void gen_vfp_##name(int dp)                             \
915{                                                                     \
916    TCGv_ptr fpst = get_fpstatus_ptr(0);                              \
917    if (dp) {                                                         \
918        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, fpst);    \
919    } else {                                                          \
920        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, fpst);    \
921    }                                                                 \
922    tcg_temp_free_ptr(fpst);                                          \
923}
924
925VFP_OP2(add)
926VFP_OP2(sub)
927VFP_OP2(mul)
928VFP_OP2(div)
929
930#undef VFP_OP2
931
932static inline void gen_vfp_F1_mul(int dp)
933{
934    /* Like gen_vfp_mul() but put result in F1 */
935    TCGv_ptr fpst = get_fpstatus_ptr(0);
936    if (dp) {
937        gen_helper_vfp_muld(cpu_F1d, cpu_F0d, cpu_F1d, fpst);
938    } else {
939        gen_helper_vfp_muls(cpu_F1s, cpu_F0s, cpu_F1s, fpst);
940    }
941}
942
943static inline void gen_vfp_F1_neg(int dp)
944{
945    /* Like gen_vfp_neg() but put result in F1 */
946    if (dp) {
947        gen_helper_vfp_negd(cpu_F1d, cpu_F0d);
948    } else {
949        gen_helper_vfp_negs(cpu_F1s, cpu_F0s);
950    }
951}
952
953static inline void gen_vfp_abs(int dp)
954{
955    if (dp)
956        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
957    else
958        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
959}
960
961static inline void gen_vfp_neg(int dp)
962{
963    if (dp)
964        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
965    else
966        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
967}
968
969static inline void gen_vfp_sqrt(int dp)
970{
971    if (dp)
972        gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
973    else
974        gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
975}
976
977static inline void gen_vfp_cmp(int dp)
978{
979    if (dp)
980        gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
981    else
982        gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
983}
984
985static inline void gen_vfp_cmpe(int dp)
986{
987    if (dp)
988        gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
989    else
990        gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
991}
992
993static inline void gen_vfp_F1_ld0(int dp)
994{
995    if (dp)
996        tcg_gen_movi_i64(cpu_F1d, 0);
997    else
998        tcg_gen_movi_i32(cpu_F1s, 0);
999}
1000
1001#define VFP_GEN_ITOF(name) \
1002static inline void gen_vfp_##name(int dp, int neon) \
1003{ \
1004    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1005    if (dp) { \
1006        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0s, statusptr); \
1007    } else { \
1008        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1009    } \
1010    tcg_temp_free_ptr(statusptr); \
1011}
1012
1013VFP_GEN_ITOF(uito)
1014VFP_GEN_ITOF(sito)
1015#undef VFP_GEN_ITOF
1016
1017#define VFP_GEN_FTOI(name) \
1018static inline void gen_vfp_##name(int dp, int neon) \
1019{ \
1020    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1021    if (dp) { \
1022        gen_helper_vfp_##name##d(cpu_F0s, cpu_F0d, statusptr); \
1023    } else { \
1024        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, statusptr); \
1025    } \
1026    tcg_temp_free_ptr(statusptr); \
1027}
1028
1029VFP_GEN_FTOI(toui)
1030VFP_GEN_FTOI(touiz)
1031VFP_GEN_FTOI(tosi)
1032VFP_GEN_FTOI(tosiz)
1033#undef VFP_GEN_FTOI
1034
1035#define VFP_GEN_FIX(name) \
1036static inline void gen_vfp_##name(int dp, int shift, int neon) \
1037{ \
1038    TCGv tmp_shift = tcg_const_i32(shift); \
1039    TCGv_ptr statusptr = get_fpstatus_ptr(neon); \
1040    if (dp) { \
1041        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tmp_shift, statusptr); \
1042    } else { \
1043        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tmp_shift, statusptr); \
1044    } \
1045    tcg_temp_free_i32(tmp_shift); \
1046    tcg_temp_free_ptr(statusptr); \
1047}
1048VFP_GEN_FIX(tosh)
1049VFP_GEN_FIX(tosl)
1050VFP_GEN_FIX(touh)
1051VFP_GEN_FIX(toul)
1052VFP_GEN_FIX(shto)
1053VFP_GEN_FIX(slto)
1054VFP_GEN_FIX(uhto)
1055VFP_GEN_FIX(ulto)
1056#undef VFP_GEN_FIX
1057
1058static inline void gen_vfp_ld(DisasContext *s, int dp, TCGv addr)
1059{
1060    if (dp)
1061        tcg_gen_qemu_ld64(cpu_F0d, addr, IS_USER(s));
1062    else
1063        tcg_gen_qemu_ld32u(cpu_F0s, addr, IS_USER(s));
1064}
1065
1066static inline void gen_vfp_st(DisasContext *s, int dp, TCGv addr)
1067{
1068    if (dp)
1069        tcg_gen_qemu_st64(cpu_F0d, addr, IS_USER(s));
1070    else
1071        tcg_gen_qemu_st32(cpu_F0s, addr, IS_USER(s));
1072}
1073
1074static inline long
1075vfp_reg_offset (int dp, int reg)
1076{
1077    if (dp)
1078        return offsetof(CPUARMState, vfp.regs[reg]);
1079    else if (reg & 1) {
1080        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1081          + offsetof(CPU_DoubleU, l.upper);
1082    } else {
1083        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1084          + offsetof(CPU_DoubleU, l.lower);
1085    }
1086}
1087
1088/* Return the offset of a 32-bit piece of a NEON register.
1089   zero is the least significant end of the register.  */
1090static inline long
1091neon_reg_offset (int reg, int n)
1092{
1093    int sreg;
1094    sreg = reg * 2 + n;
1095    return vfp_reg_offset(0, sreg);
1096}
1097
1098static TCGv neon_load_reg(int reg, int pass)
1099{
1100    TCGv tmp = tcg_temp_new_i32();
1101    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1102    return tmp;
1103}
1104
1105static void neon_store_reg(int reg, int pass, TCGv var)
1106{
1107    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1108    tcg_temp_free_i32(var);
1109}
1110
1111static inline void neon_load_reg64(TCGv_i64 var, int reg)
1112{
1113    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1114}
1115
1116static inline void neon_store_reg64(TCGv_i64 var, int reg)
1117{
1118    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1119}
1120
1121#define tcg_gen_ld_f32 tcg_gen_ld_i32
1122#define tcg_gen_ld_f64 tcg_gen_ld_i64
1123#define tcg_gen_st_f32 tcg_gen_st_i32
1124#define tcg_gen_st_f64 tcg_gen_st_i64
1125
1126static inline void gen_mov_F0_vreg(int dp, int reg)
1127{
1128    if (dp)
1129        tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1130    else
1131        tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1132}
1133
1134static inline void gen_mov_F1_vreg(int dp, int reg)
1135{
1136    if (dp)
1137        tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1138    else
1139        tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1140}
1141
1142static inline void gen_mov_vreg_F0(int dp, int reg)
1143{
1144    if (dp)
1145        tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1146    else
1147        tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1148}
1149
1150#define ARM_CP_RW_BIT	(1 << 20)
1151
1152static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1153{
1154    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1155}
1156
1157static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1158{
1159    tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1160}
1161
1162static inline TCGv iwmmxt_load_creg(int reg)
1163{
1164    TCGv var = tcg_temp_new_i32();
1165    tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1166    return var;
1167}
1168
1169static inline void iwmmxt_store_creg(int reg, TCGv var)
1170{
1171    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1172    tcg_temp_free_i32(var);
1173}
1174
1175static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1176{
1177    iwmmxt_store_reg(cpu_M0, rn);
1178}
1179
1180static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1181{
1182    iwmmxt_load_reg(cpu_M0, rn);
1183}
1184
1185static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1186{
1187    iwmmxt_load_reg(cpu_V1, rn);
1188    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1189}
1190
1191static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1192{
1193    iwmmxt_load_reg(cpu_V1, rn);
1194    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1195}
1196
1197static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1198{
1199    iwmmxt_load_reg(cpu_V1, rn);
1200    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1201}
1202
1203#define IWMMXT_OP(name) \
1204static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1205{ \
1206    iwmmxt_load_reg(cpu_V1, rn); \
1207    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1208}
1209
1210#define IWMMXT_OP_ENV(name) \
1211static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1212{ \
1213    iwmmxt_load_reg(cpu_V1, rn); \
1214    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1215}
1216
1217#define IWMMXT_OP_ENV_SIZE(name) \
1218IWMMXT_OP_ENV(name##b) \
1219IWMMXT_OP_ENV(name##w) \
1220IWMMXT_OP_ENV(name##l)
1221
1222#define IWMMXT_OP_ENV1(name) \
1223static inline void gen_op_iwmmxt_##name##_M0(void) \
1224{ \
1225    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1226}
1227
1228IWMMXT_OP(maddsq)
1229IWMMXT_OP(madduq)
1230IWMMXT_OP(sadb)
1231IWMMXT_OP(sadw)
1232IWMMXT_OP(mulslw)
1233IWMMXT_OP(mulshw)
1234IWMMXT_OP(mululw)
1235IWMMXT_OP(muluhw)
1236IWMMXT_OP(macsw)
1237IWMMXT_OP(macuw)
1238
1239IWMMXT_OP_ENV_SIZE(unpackl)
1240IWMMXT_OP_ENV_SIZE(unpackh)
1241
1242IWMMXT_OP_ENV1(unpacklub)
1243IWMMXT_OP_ENV1(unpackluw)
1244IWMMXT_OP_ENV1(unpacklul)
1245IWMMXT_OP_ENV1(unpackhub)
1246IWMMXT_OP_ENV1(unpackhuw)
1247IWMMXT_OP_ENV1(unpackhul)
1248IWMMXT_OP_ENV1(unpacklsb)
1249IWMMXT_OP_ENV1(unpacklsw)
1250IWMMXT_OP_ENV1(unpacklsl)
1251IWMMXT_OP_ENV1(unpackhsb)
1252IWMMXT_OP_ENV1(unpackhsw)
1253IWMMXT_OP_ENV1(unpackhsl)
1254
1255IWMMXT_OP_ENV_SIZE(cmpeq)
1256IWMMXT_OP_ENV_SIZE(cmpgtu)
1257IWMMXT_OP_ENV_SIZE(cmpgts)
1258
1259IWMMXT_OP_ENV_SIZE(mins)
1260IWMMXT_OP_ENV_SIZE(minu)
1261IWMMXT_OP_ENV_SIZE(maxs)
1262IWMMXT_OP_ENV_SIZE(maxu)
1263
1264IWMMXT_OP_ENV_SIZE(subn)
1265IWMMXT_OP_ENV_SIZE(addn)
1266IWMMXT_OP_ENV_SIZE(subu)
1267IWMMXT_OP_ENV_SIZE(addu)
1268IWMMXT_OP_ENV_SIZE(subs)
1269IWMMXT_OP_ENV_SIZE(adds)
1270
1271IWMMXT_OP_ENV(avgb0)
1272IWMMXT_OP_ENV(avgb1)
1273IWMMXT_OP_ENV(avgw0)
1274IWMMXT_OP_ENV(avgw1)
1275
1276IWMMXT_OP(msadb)
1277
1278IWMMXT_OP_ENV(packuw)
1279IWMMXT_OP_ENV(packul)
1280IWMMXT_OP_ENV(packuq)
1281IWMMXT_OP_ENV(packsw)
1282IWMMXT_OP_ENV(packsl)
1283IWMMXT_OP_ENV(packsq)
1284
1285static void gen_op_iwmmxt_set_mup(void)
1286{
1287    TCGv tmp;
1288    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1289    tcg_gen_ori_i32(tmp, tmp, 2);
1290    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1291}
1292
1293static void gen_op_iwmmxt_set_cup(void)
1294{
1295    TCGv tmp;
1296    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1297    tcg_gen_ori_i32(tmp, tmp, 1);
1298    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1299}
1300
1301static void gen_op_iwmmxt_setpsr_nz(void)
1302{
1303    TCGv tmp = tcg_temp_new_i32();
1304    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1305    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1306}
1307
1308static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1309{
1310    iwmmxt_load_reg(cpu_V1, rn);
1311    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1312    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1313}
1314
1315static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn, TCGv dest)
1316{
1317    int rd;
1318    uint32_t offset;
1319    TCGv tmp;
1320
1321    rd = (insn >> 16) & 0xf;
1322    tmp = load_reg(s, rd);
1323
1324    offset = (insn & 0xff) << ((insn >> 7) & 2);
1325    if (insn & (1 << 24)) {
1326        /* Pre indexed */
1327        if (insn & (1 << 23))
1328            tcg_gen_addi_i32(tmp, tmp, offset);
1329        else
1330            tcg_gen_addi_i32(tmp, tmp, -offset);
1331        tcg_gen_mov_i32(dest, tmp);
1332        if (insn & (1 << 21))
1333            store_reg(s, rd, tmp);
1334        else
1335            tcg_temp_free_i32(tmp);
1336    } else if (insn & (1 << 21)) {
1337        /* Post indexed */
1338        tcg_gen_mov_i32(dest, tmp);
1339        if (insn & (1 << 23))
1340            tcg_gen_addi_i32(tmp, tmp, offset);
1341        else
1342            tcg_gen_addi_i32(tmp, tmp, -offset);
1343        store_reg(s, rd, tmp);
1344    } else if (!(insn & (1 << 23)))
1345        return 1;
1346    return 0;
1347}
1348
1349static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv dest)
1350{
1351    int rd = (insn >> 0) & 0xf;
1352    TCGv tmp;
1353
1354    if (insn & (1 << 8)) {
1355        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3) {
1356            return 1;
1357        } else {
1358            tmp = iwmmxt_load_creg(rd);
1359        }
1360    } else {
1361        tmp = tcg_temp_new_i32();
1362        iwmmxt_load_reg(cpu_V0, rd);
1363        tcg_gen_trunc_i64_i32(tmp, cpu_V0);
1364    }
1365    tcg_gen_andi_i32(tmp, tmp, mask);
1366    tcg_gen_mov_i32(dest, tmp);
1367    tcg_temp_free_i32(tmp);
1368    return 0;
1369}
1370
1371/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occurred
1372   (ie. an undefined instruction).  */
1373static int disas_iwmmxt_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
1374{
1375    int rd, wrd;
1376    int rdhi, rdlo, rd0, rd1, i;
1377    TCGv addr;
1378    TCGv tmp, tmp2, tmp3;
1379
1380    if ((insn & 0x0e000e00) == 0x0c000000) {
1381        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1382            wrd = insn & 0xf;
1383            rdlo = (insn >> 12) & 0xf;
1384            rdhi = (insn >> 16) & 0xf;
1385            if (insn & ARM_CP_RW_BIT) {			/* TMRRC */
1386                iwmmxt_load_reg(cpu_V0, wrd);
1387                tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
1388                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1389                tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
1390            } else {					/* TMCRR */
1391                tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1392                iwmmxt_store_reg(cpu_V0, wrd);
1393                gen_op_iwmmxt_set_mup();
1394            }
1395            return 0;
1396        }
1397
1398        wrd = (insn >> 12) & 0xf;
1399        addr = tcg_temp_new_i32();
1400        if (gen_iwmmxt_address(s, insn, addr)) {
1401            tcg_temp_free_i32(addr);
1402            return 1;
1403        }
1404        if (insn & ARM_CP_RW_BIT) {
1405            if ((insn >> 28) == 0xf) {			/* WLDRW wCx */
1406                tmp = tcg_temp_new_i32();
1407                tcg_gen_qemu_ld32u(tmp, addr, IS_USER(s));
1408                iwmmxt_store_creg(wrd, tmp);
1409            } else {
1410                i = 1;
1411                if (insn & (1 << 8)) {
1412                    if (insn & (1 << 22)) {		/* WLDRD */
1413                        tcg_gen_qemu_ld64(cpu_M0, addr, IS_USER(s));
1414                        i = 0;
1415                    } else {				/* WLDRW wRd */
1416                        tmp = gen_ld32(addr, IS_USER(s));
1417                    }
1418                } else {
1419                    if (insn & (1 << 22)) {		/* WLDRH */
1420                        tmp = gen_ld16u(addr, IS_USER(s));
1421                    } else {				/* WLDRB */
1422                        tmp = gen_ld8u(addr, IS_USER(s));
1423                    }
1424                }
1425                if (i) {
1426                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1427                    tcg_temp_free_i32(tmp);
1428                }
1429                gen_op_iwmmxt_movq_wRn_M0(wrd);
1430            }
1431        } else {
1432            if ((insn >> 28) == 0xf) {			/* WSTRW wCx */
1433                tmp = iwmmxt_load_creg(wrd);
1434                gen_st32(tmp, addr, IS_USER(s));
1435            } else {
1436                gen_op_iwmmxt_movq_M0_wRn(wrd);
1437                tmp = tcg_temp_new_i32();
1438                if (insn & (1 << 8)) {
1439                    if (insn & (1 << 22)) {		/* WSTRD */
1440                        tcg_temp_free_i32(tmp);
1441                        tcg_gen_qemu_st64(cpu_M0, addr, IS_USER(s));
1442                    } else {				/* WSTRW wRd */
1443                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1444                        gen_st32(tmp, addr, IS_USER(s));
1445                    }
1446                } else {
1447                    if (insn & (1 << 22)) {		/* WSTRH */
1448                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1449                        gen_st16(tmp, addr, IS_USER(s));
1450                    } else {				/* WSTRB */
1451                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1452                        gen_st8(tmp, addr, IS_USER(s));
1453                    }
1454                }
1455            }
1456        }
1457        tcg_temp_free_i32(addr);
1458        return 0;
1459    }
1460
1461    if ((insn & 0x0f000000) != 0x0e000000)
1462        return 1;
1463
1464    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1465    case 0x000:						/* WOR */
1466        wrd = (insn >> 12) & 0xf;
1467        rd0 = (insn >> 0) & 0xf;
1468        rd1 = (insn >> 16) & 0xf;
1469        gen_op_iwmmxt_movq_M0_wRn(rd0);
1470        gen_op_iwmmxt_orq_M0_wRn(rd1);
1471        gen_op_iwmmxt_setpsr_nz();
1472        gen_op_iwmmxt_movq_wRn_M0(wrd);
1473        gen_op_iwmmxt_set_mup();
1474        gen_op_iwmmxt_set_cup();
1475        break;
1476    case 0x011:						/* TMCR */
1477        if (insn & 0xf)
1478            return 1;
1479        rd = (insn >> 12) & 0xf;
1480        wrd = (insn >> 16) & 0xf;
1481        switch (wrd) {
1482        case ARM_IWMMXT_wCID:
1483        case ARM_IWMMXT_wCASF:
1484            break;
1485        case ARM_IWMMXT_wCon:
1486            gen_op_iwmmxt_set_cup();
1487            /* Fall through.  */
1488        case ARM_IWMMXT_wCSSF:
1489            tmp = iwmmxt_load_creg(wrd);
1490            tmp2 = load_reg(s, rd);
1491            tcg_gen_andc_i32(tmp, tmp, tmp2);
1492            tcg_temp_free_i32(tmp2);
1493            iwmmxt_store_creg(wrd, tmp);
1494            break;
1495        case ARM_IWMMXT_wCGR0:
1496        case ARM_IWMMXT_wCGR1:
1497        case ARM_IWMMXT_wCGR2:
1498        case ARM_IWMMXT_wCGR3:
1499            gen_op_iwmmxt_set_cup();
1500            tmp = load_reg(s, rd);
1501            iwmmxt_store_creg(wrd, tmp);
1502            break;
1503        default:
1504            return 1;
1505        }
1506        break;
1507    case 0x100:						/* WXOR */
1508        wrd = (insn >> 12) & 0xf;
1509        rd0 = (insn >> 0) & 0xf;
1510        rd1 = (insn >> 16) & 0xf;
1511        gen_op_iwmmxt_movq_M0_wRn(rd0);
1512        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1513        gen_op_iwmmxt_setpsr_nz();
1514        gen_op_iwmmxt_movq_wRn_M0(wrd);
1515        gen_op_iwmmxt_set_mup();
1516        gen_op_iwmmxt_set_cup();
1517        break;
1518    case 0x111:						/* TMRC */
1519        if (insn & 0xf)
1520            return 1;
1521        rd = (insn >> 12) & 0xf;
1522        wrd = (insn >> 16) & 0xf;
1523        tmp = iwmmxt_load_creg(wrd);
1524        store_reg(s, rd, tmp);
1525        break;
1526    case 0x300:						/* WANDN */
1527        wrd = (insn >> 12) & 0xf;
1528        rd0 = (insn >> 0) & 0xf;
1529        rd1 = (insn >> 16) & 0xf;
1530        gen_op_iwmmxt_movq_M0_wRn(rd0);
1531        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1532        gen_op_iwmmxt_andq_M0_wRn(rd1);
1533        gen_op_iwmmxt_setpsr_nz();
1534        gen_op_iwmmxt_movq_wRn_M0(wrd);
1535        gen_op_iwmmxt_set_mup();
1536        gen_op_iwmmxt_set_cup();
1537        break;
1538    case 0x200:						/* WAND */
1539        wrd = (insn >> 12) & 0xf;
1540        rd0 = (insn >> 0) & 0xf;
1541        rd1 = (insn >> 16) & 0xf;
1542        gen_op_iwmmxt_movq_M0_wRn(rd0);
1543        gen_op_iwmmxt_andq_M0_wRn(rd1);
1544        gen_op_iwmmxt_setpsr_nz();
1545        gen_op_iwmmxt_movq_wRn_M0(wrd);
1546        gen_op_iwmmxt_set_mup();
1547        gen_op_iwmmxt_set_cup();
1548        break;
1549    case 0x810: case 0xa10:				/* WMADD */
1550        wrd = (insn >> 12) & 0xf;
1551        rd0 = (insn >> 0) & 0xf;
1552        rd1 = (insn >> 16) & 0xf;
1553        gen_op_iwmmxt_movq_M0_wRn(rd0);
1554        if (insn & (1 << 21))
1555            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1556        else
1557            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1558        gen_op_iwmmxt_movq_wRn_M0(wrd);
1559        gen_op_iwmmxt_set_mup();
1560        break;
1561    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:	/* WUNPCKIL */
1562        wrd = (insn >> 12) & 0xf;
1563        rd0 = (insn >> 16) & 0xf;
1564        rd1 = (insn >> 0) & 0xf;
1565        gen_op_iwmmxt_movq_M0_wRn(rd0);
1566        switch ((insn >> 22) & 3) {
1567        case 0:
1568            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1569            break;
1570        case 1:
1571            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1572            break;
1573        case 2:
1574            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1575            break;
1576        case 3:
1577            return 1;
1578        }
1579        gen_op_iwmmxt_movq_wRn_M0(wrd);
1580        gen_op_iwmmxt_set_mup();
1581        gen_op_iwmmxt_set_cup();
1582        break;
1583    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:	/* WUNPCKIH */
1584        wrd = (insn >> 12) & 0xf;
1585        rd0 = (insn >> 16) & 0xf;
1586        rd1 = (insn >> 0) & 0xf;
1587        gen_op_iwmmxt_movq_M0_wRn(rd0);
1588        switch ((insn >> 22) & 3) {
1589        case 0:
1590            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1591            break;
1592        case 1:
1593            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1594            break;
1595        case 2:
1596            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1597            break;
1598        case 3:
1599            return 1;
1600        }
1601        gen_op_iwmmxt_movq_wRn_M0(wrd);
1602        gen_op_iwmmxt_set_mup();
1603        gen_op_iwmmxt_set_cup();
1604        break;
1605    case 0x012: case 0x112: case 0x412: case 0x512:	/* WSAD */
1606        wrd = (insn >> 12) & 0xf;
1607        rd0 = (insn >> 16) & 0xf;
1608        rd1 = (insn >> 0) & 0xf;
1609        gen_op_iwmmxt_movq_M0_wRn(rd0);
1610        if (insn & (1 << 22))
1611            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1612        else
1613            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1614        if (!(insn & (1 << 20)))
1615            gen_op_iwmmxt_addl_M0_wRn(wrd);
1616        gen_op_iwmmxt_movq_wRn_M0(wrd);
1617        gen_op_iwmmxt_set_mup();
1618        break;
1619    case 0x010: case 0x110: case 0x210: case 0x310:	/* WMUL */
1620        wrd = (insn >> 12) & 0xf;
1621        rd0 = (insn >> 16) & 0xf;
1622        rd1 = (insn >> 0) & 0xf;
1623        gen_op_iwmmxt_movq_M0_wRn(rd0);
1624        if (insn & (1 << 21)) {
1625            if (insn & (1 << 20))
1626                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1627            else
1628                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1629        } else {
1630            if (insn & (1 << 20))
1631                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1632            else
1633                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1634        }
1635        gen_op_iwmmxt_movq_wRn_M0(wrd);
1636        gen_op_iwmmxt_set_mup();
1637        break;
1638    case 0x410: case 0x510: case 0x610: case 0x710:	/* WMAC */
1639        wrd = (insn >> 12) & 0xf;
1640        rd0 = (insn >> 16) & 0xf;
1641        rd1 = (insn >> 0) & 0xf;
1642        gen_op_iwmmxt_movq_M0_wRn(rd0);
1643        if (insn & (1 << 21))
1644            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1645        else
1646            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1647        if (!(insn & (1 << 20))) {
1648            iwmmxt_load_reg(cpu_V1, wrd);
1649            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1650        }
1651        gen_op_iwmmxt_movq_wRn_M0(wrd);
1652        gen_op_iwmmxt_set_mup();
1653        break;
1654    case 0x006: case 0x406: case 0x806: case 0xc06:	/* WCMPEQ */
1655        wrd = (insn >> 12) & 0xf;
1656        rd0 = (insn >> 16) & 0xf;
1657        rd1 = (insn >> 0) & 0xf;
1658        gen_op_iwmmxt_movq_M0_wRn(rd0);
1659        switch ((insn >> 22) & 3) {
1660        case 0:
1661            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1662            break;
1663        case 1:
1664            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1665            break;
1666        case 2:
1667            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1668            break;
1669        case 3:
1670            return 1;
1671        }
1672        gen_op_iwmmxt_movq_wRn_M0(wrd);
1673        gen_op_iwmmxt_set_mup();
1674        gen_op_iwmmxt_set_cup();
1675        break;
1676    case 0x800: case 0x900: case 0xc00: case 0xd00:	/* WAVG2 */
1677        wrd = (insn >> 12) & 0xf;
1678        rd0 = (insn >> 16) & 0xf;
1679        rd1 = (insn >> 0) & 0xf;
1680        gen_op_iwmmxt_movq_M0_wRn(rd0);
1681        if (insn & (1 << 22)) {
1682            if (insn & (1 << 20))
1683                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1684            else
1685                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1686        } else {
1687            if (insn & (1 << 20))
1688                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1689            else
1690                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1691        }
1692        gen_op_iwmmxt_movq_wRn_M0(wrd);
1693        gen_op_iwmmxt_set_mup();
1694        gen_op_iwmmxt_set_cup();
1695        break;
1696    case 0x802: case 0x902: case 0xa02: case 0xb02:	/* WALIGNR */
1697        wrd = (insn >> 12) & 0xf;
1698        rd0 = (insn >> 16) & 0xf;
1699        rd1 = (insn >> 0) & 0xf;
1700        gen_op_iwmmxt_movq_M0_wRn(rd0);
1701        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1702        tcg_gen_andi_i32(tmp, tmp, 7);
1703        iwmmxt_load_reg(cpu_V1, rd1);
1704        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1705        tcg_temp_free_i32(tmp);
1706        gen_op_iwmmxt_movq_wRn_M0(wrd);
1707        gen_op_iwmmxt_set_mup();
1708        break;
1709    case 0x601: case 0x605: case 0x609: case 0x60d:	/* TINSR */
1710        if (((insn >> 6) & 3) == 3)
1711            return 1;
1712        rd = (insn >> 12) & 0xf;
1713        wrd = (insn >> 16) & 0xf;
1714        tmp = load_reg(s, rd);
1715        gen_op_iwmmxt_movq_M0_wRn(wrd);
1716        switch ((insn >> 6) & 3) {
1717        case 0:
1718            tmp2 = tcg_const_i32(0xff);
1719            tmp3 = tcg_const_i32((insn & 7) << 3);
1720            break;
1721        case 1:
1722            tmp2 = tcg_const_i32(0xffff);
1723            tmp3 = tcg_const_i32((insn & 3) << 4);
1724            break;
1725        case 2:
1726            tmp2 = tcg_const_i32(0xffffffff);
1727            tmp3 = tcg_const_i32((insn & 1) << 5);
1728            break;
1729        default:
1730            TCGV_UNUSED(tmp2);
1731            TCGV_UNUSED(tmp3);
1732        }
1733        gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1734        tcg_temp_free(tmp3);
1735        tcg_temp_free(tmp2);
1736        tcg_temp_free_i32(tmp);
1737        gen_op_iwmmxt_movq_wRn_M0(wrd);
1738        gen_op_iwmmxt_set_mup();
1739        break;
1740    case 0x107: case 0x507: case 0x907: case 0xd07:	/* TEXTRM */
1741        rd = (insn >> 12) & 0xf;
1742        wrd = (insn >> 16) & 0xf;
1743        if (rd == 15 || ((insn >> 22) & 3) == 3)
1744            return 1;
1745        gen_op_iwmmxt_movq_M0_wRn(wrd);
1746        tmp = tcg_temp_new_i32();
1747        switch ((insn >> 22) & 3) {
1748        case 0:
1749            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 7) << 3);
1750            tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1751            if (insn & 8) {
1752                tcg_gen_ext8s_i32(tmp, tmp);
1753            } else {
1754                tcg_gen_andi_i32(tmp, tmp, 0xff);
1755            }
1756            break;
1757        case 1:
1758            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 3) << 4);
1759            tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1760            if (insn & 8) {
1761                tcg_gen_ext16s_i32(tmp, tmp);
1762            } else {
1763                tcg_gen_andi_i32(tmp, tmp, 0xffff);
1764            }
1765            break;
1766        case 2:
1767            tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & 1) << 5);
1768            tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1769            break;
1770        }
1771        store_reg(s, rd, tmp);
1772        break;
1773    case 0x117: case 0x517: case 0x917: case 0xd17:	/* TEXTRC */
1774        if ((insn & 0x000ff008) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1775            return 1;
1776        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1777        switch ((insn >> 22) & 3) {
1778        case 0:
1779            tcg_gen_shri_i32(tmp, tmp, ((insn & 7) << 2) + 0);
1780            break;
1781        case 1:
1782            tcg_gen_shri_i32(tmp, tmp, ((insn & 3) << 3) + 4);
1783            break;
1784        case 2:
1785            tcg_gen_shri_i32(tmp, tmp, ((insn & 1) << 4) + 12);
1786            break;
1787        }
1788        tcg_gen_shli_i32(tmp, tmp, 28);
1789        gen_set_nzcv(tmp);
1790        tcg_temp_free_i32(tmp);
1791        break;
1792    case 0x401: case 0x405: case 0x409: case 0x40d:	/* TBCST */
1793        if (((insn >> 6) & 3) == 3)
1794            return 1;
1795        rd = (insn >> 12) & 0xf;
1796        wrd = (insn >> 16) & 0xf;
1797        tmp = load_reg(s, rd);
1798        switch ((insn >> 6) & 3) {
1799        case 0:
1800            gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1801            break;
1802        case 1:
1803            gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1804            break;
1805        case 2:
1806            gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1807            break;
1808        }
1809        tcg_temp_free_i32(tmp);
1810        gen_op_iwmmxt_movq_wRn_M0(wrd);
1811        gen_op_iwmmxt_set_mup();
1812        break;
1813    case 0x113: case 0x513: case 0x913: case 0xd13:	/* TANDC */
1814        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1815            return 1;
1816        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1817        tmp2 = tcg_temp_new_i32();
1818        tcg_gen_mov_i32(tmp2, tmp);
1819        switch ((insn >> 22) & 3) {
1820        case 0:
1821            for (i = 0; i < 7; i ++) {
1822                tcg_gen_shli_i32(tmp2, tmp2, 4);
1823                tcg_gen_and_i32(tmp, tmp, tmp2);
1824            }
1825            break;
1826        case 1:
1827            for (i = 0; i < 3; i ++) {
1828                tcg_gen_shli_i32(tmp2, tmp2, 8);
1829                tcg_gen_and_i32(tmp, tmp, tmp2);
1830            }
1831            break;
1832        case 2:
1833            tcg_gen_shli_i32(tmp2, tmp2, 16);
1834            tcg_gen_and_i32(tmp, tmp, tmp2);
1835            break;
1836        }
1837        gen_set_nzcv(tmp);
1838        tcg_temp_free_i32(tmp2);
1839        tcg_temp_free_i32(tmp);
1840        break;
1841    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:	/* WACC */
1842        wrd = (insn >> 12) & 0xf;
1843        rd0 = (insn >> 16) & 0xf;
1844        gen_op_iwmmxt_movq_M0_wRn(rd0);
1845        switch ((insn >> 22) & 3) {
1846        case 0:
1847            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1848            break;
1849        case 1:
1850            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1851            break;
1852        case 2:
1853            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1854            break;
1855        case 3:
1856            return 1;
1857        }
1858        gen_op_iwmmxt_movq_wRn_M0(wrd);
1859        gen_op_iwmmxt_set_mup();
1860        break;
1861    case 0x115: case 0x515: case 0x915: case 0xd15:	/* TORC */
1862        if ((insn & 0x000ff00f) != 0x0003f000 || ((insn >> 22) & 3) == 3)
1863            return 1;
1864        tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1865        tmp2 = tcg_temp_new_i32();
1866        tcg_gen_mov_i32(tmp2, tmp);
1867        switch ((insn >> 22) & 3) {
1868        case 0:
1869            for (i = 0; i < 7; i ++) {
1870                tcg_gen_shli_i32(tmp2, tmp2, 4);
1871                tcg_gen_or_i32(tmp, tmp, tmp2);
1872            }
1873            break;
1874        case 1:
1875            for (i = 0; i < 3; i ++) {
1876                tcg_gen_shli_i32(tmp2, tmp2, 8);
1877                tcg_gen_or_i32(tmp, tmp, tmp2);
1878            }
1879            break;
1880        case 2:
1881            tcg_gen_shli_i32(tmp2, tmp2, 16);
1882            tcg_gen_or_i32(tmp, tmp, tmp2);
1883            break;
1884        }
1885        gen_set_nzcv(tmp);
1886        tcg_temp_free_i32(tmp2);
1887        tcg_temp_free_i32(tmp);
1888        break;
1889    case 0x103: case 0x503: case 0x903: case 0xd03:	/* TMOVMSK */
1890        rd = (insn >> 12) & 0xf;
1891        rd0 = (insn >> 16) & 0xf;
1892        if ((insn & 0xf) != 0 || ((insn >> 22) & 3) == 3)
1893            return 1;
1894        gen_op_iwmmxt_movq_M0_wRn(rd0);
1895        tmp = tcg_temp_new_i32();
1896        switch ((insn >> 22) & 3) {
1897        case 0:
1898            gen_helper_iwmmxt_msbb(tmp, cpu_M0);
1899            break;
1900        case 1:
1901            gen_helper_iwmmxt_msbw(tmp, cpu_M0);
1902            break;
1903        case 2:
1904            gen_helper_iwmmxt_msbl(tmp, cpu_M0);
1905            break;
1906        }
1907        store_reg(s, rd, tmp);
1908        break;
1909    case 0x106: case 0x306: case 0x506: case 0x706:	/* WCMPGT */
1910    case 0x906: case 0xb06: case 0xd06: case 0xf06:
1911        wrd = (insn >> 12) & 0xf;
1912        rd0 = (insn >> 16) & 0xf;
1913        rd1 = (insn >> 0) & 0xf;
1914        gen_op_iwmmxt_movq_M0_wRn(rd0);
1915        switch ((insn >> 22) & 3) {
1916        case 0:
1917            if (insn & (1 << 21))
1918                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
1919            else
1920                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
1921            break;
1922        case 1:
1923            if (insn & (1 << 21))
1924                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
1925            else
1926                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
1927            break;
1928        case 2:
1929            if (insn & (1 << 21))
1930                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
1931            else
1932                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
1933            break;
1934        case 3:
1935            return 1;
1936        }
1937        gen_op_iwmmxt_movq_wRn_M0(wrd);
1938        gen_op_iwmmxt_set_mup();
1939        gen_op_iwmmxt_set_cup();
1940        break;
1941    case 0x00e: case 0x20e: case 0x40e: case 0x60e:	/* WUNPCKEL */
1942    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
1943        wrd = (insn >> 12) & 0xf;
1944        rd0 = (insn >> 16) & 0xf;
1945        gen_op_iwmmxt_movq_M0_wRn(rd0);
1946        switch ((insn >> 22) & 3) {
1947        case 0:
1948            if (insn & (1 << 21))
1949                gen_op_iwmmxt_unpacklsb_M0();
1950            else
1951                gen_op_iwmmxt_unpacklub_M0();
1952            break;
1953        case 1:
1954            if (insn & (1 << 21))
1955                gen_op_iwmmxt_unpacklsw_M0();
1956            else
1957                gen_op_iwmmxt_unpackluw_M0();
1958            break;
1959        case 2:
1960            if (insn & (1 << 21))
1961                gen_op_iwmmxt_unpacklsl_M0();
1962            else
1963                gen_op_iwmmxt_unpacklul_M0();
1964            break;
1965        case 3:
1966            return 1;
1967        }
1968        gen_op_iwmmxt_movq_wRn_M0(wrd);
1969        gen_op_iwmmxt_set_mup();
1970        gen_op_iwmmxt_set_cup();
1971        break;
1972    case 0x00c: case 0x20c: case 0x40c: case 0x60c:	/* WUNPCKEH */
1973    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
1974        wrd = (insn >> 12) & 0xf;
1975        rd0 = (insn >> 16) & 0xf;
1976        gen_op_iwmmxt_movq_M0_wRn(rd0);
1977        switch ((insn >> 22) & 3) {
1978        case 0:
1979            if (insn & (1 << 21))
1980                gen_op_iwmmxt_unpackhsb_M0();
1981            else
1982                gen_op_iwmmxt_unpackhub_M0();
1983            break;
1984        case 1:
1985            if (insn & (1 << 21))
1986                gen_op_iwmmxt_unpackhsw_M0();
1987            else
1988                gen_op_iwmmxt_unpackhuw_M0();
1989            break;
1990        case 2:
1991            if (insn & (1 << 21))
1992                gen_op_iwmmxt_unpackhsl_M0();
1993            else
1994                gen_op_iwmmxt_unpackhul_M0();
1995            break;
1996        case 3:
1997            return 1;
1998        }
1999        gen_op_iwmmxt_movq_wRn_M0(wrd);
2000        gen_op_iwmmxt_set_mup();
2001        gen_op_iwmmxt_set_cup();
2002        break;
2003    case 0x204: case 0x604: case 0xa04: case 0xe04:	/* WSRL */
2004    case 0x214: case 0x614: case 0xa14: case 0xe14:
2005        if (((insn >> 22) & 3) == 0)
2006            return 1;
2007        wrd = (insn >> 12) & 0xf;
2008        rd0 = (insn >> 16) & 0xf;
2009        gen_op_iwmmxt_movq_M0_wRn(rd0);
2010        tmp = tcg_temp_new_i32();
2011        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2012            tcg_temp_free_i32(tmp);
2013            return 1;
2014        }
2015        switch ((insn >> 22) & 3) {
2016        case 1:
2017            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2018            break;
2019        case 2:
2020            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2021            break;
2022        case 3:
2023            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2024            break;
2025        }
2026        tcg_temp_free_i32(tmp);
2027        gen_op_iwmmxt_movq_wRn_M0(wrd);
2028        gen_op_iwmmxt_set_mup();
2029        gen_op_iwmmxt_set_cup();
2030        break;
2031    case 0x004: case 0x404: case 0x804: case 0xc04:	/* WSRA */
2032    case 0x014: case 0x414: case 0x814: case 0xc14:
2033        if (((insn >> 22) & 3) == 0)
2034            return 1;
2035        wrd = (insn >> 12) & 0xf;
2036        rd0 = (insn >> 16) & 0xf;
2037        gen_op_iwmmxt_movq_M0_wRn(rd0);
2038        tmp = tcg_temp_new_i32();
2039        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2040            tcg_temp_free_i32(tmp);
2041            return 1;
2042        }
2043        switch ((insn >> 22) & 3) {
2044        case 1:
2045            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2046            break;
2047        case 2:
2048            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2049            break;
2050        case 3:
2051            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2052            break;
2053        }
2054        tcg_temp_free_i32(tmp);
2055        gen_op_iwmmxt_movq_wRn_M0(wrd);
2056        gen_op_iwmmxt_set_mup();
2057        gen_op_iwmmxt_set_cup();
2058        break;
2059    case 0x104: case 0x504: case 0x904: case 0xd04:	/* WSLL */
2060    case 0x114: case 0x514: case 0x914: case 0xd14:
2061        if (((insn >> 22) & 3) == 0)
2062            return 1;
2063        wrd = (insn >> 12) & 0xf;
2064        rd0 = (insn >> 16) & 0xf;
2065        gen_op_iwmmxt_movq_M0_wRn(rd0);
2066        tmp = tcg_temp_new_i32();
2067        if (gen_iwmmxt_shift(insn, 0xff, tmp)) {
2068            tcg_temp_free_i32(tmp);
2069            return 1;
2070        }
2071        switch ((insn >> 22) & 3) {
2072        case 1:
2073            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2074            break;
2075        case 2:
2076            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2077            break;
2078        case 3:
2079            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2080            break;
2081        }
2082        tcg_temp_free_i32(tmp);
2083        gen_op_iwmmxt_movq_wRn_M0(wrd);
2084        gen_op_iwmmxt_set_mup();
2085        gen_op_iwmmxt_set_cup();
2086        break;
2087    case 0x304: case 0x704: case 0xb04: case 0xf04:	/* WROR */
2088    case 0x314: case 0x714: case 0xb14: case 0xf14:
2089        if (((insn >> 22) & 3) == 0)
2090            return 1;
2091        wrd = (insn >> 12) & 0xf;
2092        rd0 = (insn >> 16) & 0xf;
2093        gen_op_iwmmxt_movq_M0_wRn(rd0);
2094        tmp = tcg_temp_new_i32();
2095        switch ((insn >> 22) & 3) {
2096        case 1:
2097            if (gen_iwmmxt_shift(insn, 0xf, tmp)) {
2098                tcg_temp_free_i32(tmp);
2099                return 1;
2100            }
2101            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2102            break;
2103        case 2:
2104            if (gen_iwmmxt_shift(insn, 0x1f, tmp)) {
2105                tcg_temp_free_i32(tmp);
2106                return 1;
2107            }
2108            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2109            break;
2110        case 3:
2111            if (gen_iwmmxt_shift(insn, 0x3f, tmp)) {
2112                tcg_temp_free_i32(tmp);
2113                return 1;
2114            }
2115            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2116            break;
2117        }
2118        tcg_temp_free_i32(tmp);
2119        gen_op_iwmmxt_movq_wRn_M0(wrd);
2120        gen_op_iwmmxt_set_mup();
2121        gen_op_iwmmxt_set_cup();
2122        break;
2123    case 0x116: case 0x316: case 0x516: case 0x716:	/* WMIN */
2124    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2125        wrd = (insn >> 12) & 0xf;
2126        rd0 = (insn >> 16) & 0xf;
2127        rd1 = (insn >> 0) & 0xf;
2128        gen_op_iwmmxt_movq_M0_wRn(rd0);
2129        switch ((insn >> 22) & 3) {
2130        case 0:
2131            if (insn & (1 << 21))
2132                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2133            else
2134                gen_op_iwmmxt_minub_M0_wRn(rd1);
2135            break;
2136        case 1:
2137            if (insn & (1 << 21))
2138                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2139            else
2140                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2141            break;
2142        case 2:
2143            if (insn & (1 << 21))
2144                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2145            else
2146                gen_op_iwmmxt_minul_M0_wRn(rd1);
2147            break;
2148        case 3:
2149            return 1;
2150        }
2151        gen_op_iwmmxt_movq_wRn_M0(wrd);
2152        gen_op_iwmmxt_set_mup();
2153        break;
2154    case 0x016: case 0x216: case 0x416: case 0x616:	/* WMAX */
2155    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2156        wrd = (insn >> 12) & 0xf;
2157        rd0 = (insn >> 16) & 0xf;
2158        rd1 = (insn >> 0) & 0xf;
2159        gen_op_iwmmxt_movq_M0_wRn(rd0);
2160        switch ((insn >> 22) & 3) {
2161        case 0:
2162            if (insn & (1 << 21))
2163                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2164            else
2165                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2166            break;
2167        case 1:
2168            if (insn & (1 << 21))
2169                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2170            else
2171                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2172            break;
2173        case 2:
2174            if (insn & (1 << 21))
2175                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2176            else
2177                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2178            break;
2179        case 3:
2180            return 1;
2181        }
2182        gen_op_iwmmxt_movq_wRn_M0(wrd);
2183        gen_op_iwmmxt_set_mup();
2184        break;
2185    case 0x002: case 0x102: case 0x202: case 0x302:	/* WALIGNI */
2186    case 0x402: case 0x502: case 0x602: case 0x702:
2187        wrd = (insn >> 12) & 0xf;
2188        rd0 = (insn >> 16) & 0xf;
2189        rd1 = (insn >> 0) & 0xf;
2190        gen_op_iwmmxt_movq_M0_wRn(rd0);
2191        tmp = tcg_const_i32((insn >> 20) & 3);
2192        iwmmxt_load_reg(cpu_V1, rd1);
2193        gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2194        tcg_temp_free(tmp);
2195        gen_op_iwmmxt_movq_wRn_M0(wrd);
2196        gen_op_iwmmxt_set_mup();
2197        break;
2198    case 0x01a: case 0x11a: case 0x21a: case 0x31a:	/* WSUB */
2199    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2200    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2201    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2202        wrd = (insn >> 12) & 0xf;
2203        rd0 = (insn >> 16) & 0xf;
2204        rd1 = (insn >> 0) & 0xf;
2205        gen_op_iwmmxt_movq_M0_wRn(rd0);
2206        switch ((insn >> 20) & 0xf) {
2207        case 0x0:
2208            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2209            break;
2210        case 0x1:
2211            gen_op_iwmmxt_subub_M0_wRn(rd1);
2212            break;
2213        case 0x3:
2214            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2215            break;
2216        case 0x4:
2217            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2218            break;
2219        case 0x5:
2220            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2221            break;
2222        case 0x7:
2223            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2224            break;
2225        case 0x8:
2226            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2227            break;
2228        case 0x9:
2229            gen_op_iwmmxt_subul_M0_wRn(rd1);
2230            break;
2231        case 0xb:
2232            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2233            break;
2234        default:
2235            return 1;
2236        }
2237        gen_op_iwmmxt_movq_wRn_M0(wrd);
2238        gen_op_iwmmxt_set_mup();
2239        gen_op_iwmmxt_set_cup();
2240        break;
2241    case 0x01e: case 0x11e: case 0x21e: case 0x31e:	/* WSHUFH */
2242    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2243    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2244    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2245        wrd = (insn >> 12) & 0xf;
2246        rd0 = (insn >> 16) & 0xf;
2247        gen_op_iwmmxt_movq_M0_wRn(rd0);
2248        tmp = tcg_const_i32(((insn >> 16) & 0xf0) | (insn & 0x0f));
2249        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2250        tcg_temp_free(tmp);
2251        gen_op_iwmmxt_movq_wRn_M0(wrd);
2252        gen_op_iwmmxt_set_mup();
2253        gen_op_iwmmxt_set_cup();
2254        break;
2255    case 0x018: case 0x118: case 0x218: case 0x318:	/* WADD */
2256    case 0x418: case 0x518: case 0x618: case 0x718:
2257    case 0x818: case 0x918: case 0xa18: case 0xb18:
2258    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2259        wrd = (insn >> 12) & 0xf;
2260        rd0 = (insn >> 16) & 0xf;
2261        rd1 = (insn >> 0) & 0xf;
2262        gen_op_iwmmxt_movq_M0_wRn(rd0);
2263        switch ((insn >> 20) & 0xf) {
2264        case 0x0:
2265            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2266            break;
2267        case 0x1:
2268            gen_op_iwmmxt_addub_M0_wRn(rd1);
2269            break;
2270        case 0x3:
2271            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2272            break;
2273        case 0x4:
2274            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2275            break;
2276        case 0x5:
2277            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2278            break;
2279        case 0x7:
2280            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2281            break;
2282        case 0x8:
2283            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2284            break;
2285        case 0x9:
2286            gen_op_iwmmxt_addul_M0_wRn(rd1);
2287            break;
2288        case 0xb:
2289            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2290            break;
2291        default:
2292            return 1;
2293        }
2294        gen_op_iwmmxt_movq_wRn_M0(wrd);
2295        gen_op_iwmmxt_set_mup();
2296        gen_op_iwmmxt_set_cup();
2297        break;
2298    case 0x008: case 0x108: case 0x208: case 0x308:	/* WPACK */
2299    case 0x408: case 0x508: case 0x608: case 0x708:
2300    case 0x808: case 0x908: case 0xa08: case 0xb08:
2301    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2302        if (!(insn & (1 << 20)) || ((insn >> 22) & 3) == 0)
2303            return 1;
2304        wrd = (insn >> 12) & 0xf;
2305        rd0 = (insn >> 16) & 0xf;
2306        rd1 = (insn >> 0) & 0xf;
2307        gen_op_iwmmxt_movq_M0_wRn(rd0);
2308        switch ((insn >> 22) & 3) {
2309        case 1:
2310            if (insn & (1 << 21))
2311                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2312            else
2313                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2314            break;
2315        case 2:
2316            if (insn & (1 << 21))
2317                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2318            else
2319                gen_op_iwmmxt_packul_M0_wRn(rd1);
2320            break;
2321        case 3:
2322            if (insn & (1 << 21))
2323                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2324            else
2325                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2326            break;
2327        }
2328        gen_op_iwmmxt_movq_wRn_M0(wrd);
2329        gen_op_iwmmxt_set_mup();
2330        gen_op_iwmmxt_set_cup();
2331        break;
2332    case 0x201: case 0x203: case 0x205: case 0x207:
2333    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2334    case 0x211: case 0x213: case 0x215: case 0x217:
2335    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2336        wrd = (insn >> 5) & 0xf;
2337        rd0 = (insn >> 12) & 0xf;
2338        rd1 = (insn >> 0) & 0xf;
2339        if (rd0 == 0xf || rd1 == 0xf)
2340            return 1;
2341        gen_op_iwmmxt_movq_M0_wRn(wrd);
2342        tmp = load_reg(s, rd0);
2343        tmp2 = load_reg(s, rd1);
2344        switch ((insn >> 16) & 0xf) {
2345        case 0x0:					/* TMIA */
2346            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2347            break;
2348        case 0x8:					/* TMIAPH */
2349            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2350            break;
2351        case 0xc: case 0xd: case 0xe: case 0xf:		/* TMIAxy */
2352            if (insn & (1 << 16))
2353                tcg_gen_shri_i32(tmp, tmp, 16);
2354            if (insn & (1 << 17))
2355                tcg_gen_shri_i32(tmp2, tmp2, 16);
2356            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2357            break;
2358        default:
2359            tcg_temp_free_i32(tmp2);
2360            tcg_temp_free_i32(tmp);
2361            return 1;
2362        }
2363        tcg_temp_free_i32(tmp2);
2364        tcg_temp_free_i32(tmp);
2365        gen_op_iwmmxt_movq_wRn_M0(wrd);
2366        gen_op_iwmmxt_set_mup();
2367        break;
2368    default:
2369        return 1;
2370    }
2371
2372    return 0;
2373}
2374
2375/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occurred
2376   (ie. an undefined instruction).  */
2377static int disas_dsp_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
2378{
2379    int acc, rd0, rd1, rdhi, rdlo;
2380    TCGv tmp, tmp2;
2381
2382    if ((insn & 0x0ff00f10) == 0x0e200010) {
2383        /* Multiply with Internal Accumulate Format */
2384        rd0 = (insn >> 12) & 0xf;
2385        rd1 = insn & 0xf;
2386        acc = (insn >> 5) & 7;
2387
2388        if (acc != 0)
2389            return 1;
2390
2391        tmp = load_reg(s, rd0);
2392        tmp2 = load_reg(s, rd1);
2393        switch ((insn >> 16) & 0xf) {
2394        case 0x0:					/* MIA */
2395            gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2396            break;
2397        case 0x8:					/* MIAPH */
2398            gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2399            break;
2400        case 0xc:					/* MIABB */
2401        case 0xd:					/* MIABT */
2402        case 0xe:					/* MIATB */
2403        case 0xf:					/* MIATT */
2404            if (insn & (1 << 16))
2405                tcg_gen_shri_i32(tmp, tmp, 16);
2406            if (insn & (1 << 17))
2407                tcg_gen_shri_i32(tmp2, tmp2, 16);
2408            gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2409            break;
2410        default:
2411            return 1;
2412        }
2413        tcg_temp_free_i32(tmp2);
2414        tcg_temp_free_i32(tmp);
2415
2416        gen_op_iwmmxt_movq_wRn_M0(acc);
2417        return 0;
2418    }
2419
2420    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2421        /* Internal Accumulator Access Format */
2422        rdhi = (insn >> 16) & 0xf;
2423        rdlo = (insn >> 12) & 0xf;
2424        acc = insn & 7;
2425
2426        if (acc != 0)
2427            return 1;
2428
2429        if (insn & ARM_CP_RW_BIT) {			/* MRA */
2430            iwmmxt_load_reg(cpu_V0, acc);
2431            tcg_gen_trunc_i64_i32(cpu_R[rdlo], cpu_V0);
2432            tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
2433            tcg_gen_trunc_i64_i32(cpu_R[rdhi], cpu_V0);
2434            tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (1 << (40 - 32)) - 1);
2435        } else {					/* MAR */
2436            tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2437            iwmmxt_store_reg(cpu_V0, acc);
2438        }
2439        return 0;
2440    }
2441
2442    return 1;
2443}
2444
2445/* Disassemble system coprocessor instruction.  Return nonzero if
2446   instruction is not defined.  */
2447static int disas_cp_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
2448{
2449    TCGv tmp, tmp2;
2450    uint32_t rd = (insn >> 12) & 0xf;
2451    uint32_t cp = (insn >> 8) & 0xf;
2452
2453    if (insn & ARM_CP_RW_BIT) {
2454        if (!env->cp[cp].cp_read)
2455            return 1;
2456        gen_set_pc_im(s->pc);
2457        tmp = tcg_temp_new_i32();
2458        tmp2 = tcg_const_i32(insn);
2459        gen_helper_get_cp(tmp, cpu_env, tmp2);
2460        tcg_temp_free(tmp2);
2461        store_reg(s, rd, tmp);
2462    } else {
2463        if (!env->cp[cp].cp_write)
2464            return 1;
2465        gen_set_pc_im(s->pc);
2466        tmp = load_reg(s, rd);
2467        tmp2 = tcg_const_i32(insn);
2468        gen_helper_set_cp(cpu_env, tmp2, tmp);
2469        tcg_temp_free(tmp2);
2470        tcg_temp_free_i32(tmp);
2471    }
2472    return 0;
2473}
2474
2475static int cp15_user_ok(CPUARMState *env, uint32_t insn)
2476{
2477    int cpn = (insn >> 16) & 0xf;
2478    int cpm = insn & 0xf;
2479    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2480
2481    if (arm_feature(env, ARM_FEATURE_V7) && cpn == 9) {
2482        /* Performance monitor registers fall into three categories:
2483         *  (a) always UNDEF in usermode
2484         *  (b) UNDEF only if PMUSERENR.EN is 0
2485         *  (c) always read OK and UNDEF on write (PMUSERENR only)
2486         */
2487        if ((cpm == 12 && (op < 6)) ||
2488            (cpm == 13 && (op < 3))) {
2489            return env->cp15.c9_pmuserenr;
2490        } else if (cpm == 14 && op == 0 && (insn & ARM_CP_RW_BIT)) {
2491            /* PMUSERENR, read only */
2492            return 1;
2493        }
2494        return 0;
2495    }
2496
2497    if (cpn == 13 && cpm == 0) {
2498        /* TLS register.  */
2499        if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
2500            return 1;
2501    }
2502    return 0;
2503}
2504
2505static int cp15_tls_load_store(CPUARMState *env, DisasContext *s, uint32_t insn, uint32_t rd)
2506{
2507    TCGv tmp;
2508    int cpn = (insn >> 16) & 0xf;
2509    int cpm = insn & 0xf;
2510    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2511
2512    if (!arm_feature(env, ARM_FEATURE_V6K))
2513        return 0;
2514
2515    if (!(cpn == 13 && cpm == 0))
2516        return 0;
2517
2518    if (insn & ARM_CP_RW_BIT) {
2519        switch (op) {
2520        case 2:
2521            tmp = load_cpu_field(cp15.c13_tls1);
2522            break;
2523        case 3:
2524            tmp = load_cpu_field(cp15.c13_tls2);
2525            break;
2526        case 4:
2527            tmp = load_cpu_field(cp15.c13_tls3);
2528            break;
2529        default:
2530            return 0;
2531        }
2532        store_reg(s, rd, tmp);
2533
2534    } else {
2535        tmp = load_reg(s, rd);
2536        switch (op) {
2537        case 2:
2538            store_cpu_field(tmp, cp15.c13_tls1);
2539            break;
2540        case 3:
2541            store_cpu_field(tmp, cp15.c13_tls2);
2542            break;
2543        case 4:
2544            store_cpu_field(tmp, cp15.c13_tls3);
2545            break;
2546        default:
2547            tcg_temp_free_i32(tmp);
2548            return 0;
2549        }
2550    }
2551    return 1;
2552}
2553
2554/* Disassemble system coprocessor (cp15) instruction.  Return nonzero if
2555   instruction is not defined.  */
2556static int disas_cp15_insn(CPUARMState *env, DisasContext *s, uint32_t insn)
2557{
2558    uint32_t rd;
2559    TCGv tmp, tmp2;
2560
2561    /* M profile cores use memory mapped registers instead of cp15.  */
2562    if (arm_feature(env, ARM_FEATURE_M))
2563	return 1;
2564
2565    if ((insn & (1 << 25)) == 0) {
2566        if (insn & (1 << 20)) {
2567            /* mrrc */
2568            return 1;
2569        }
2570        /* mcrr.  Used for block cache operations, so implement as no-op.  */
2571        return 0;
2572    }
2573    if ((insn & (1 << 4)) == 0) {
2574        /* cdp */
2575        return 1;
2576    }
2577    /* We special case a number of cp15 instructions which were used
2578     * for things which are real instructions in ARMv7. This allows
2579     * them to work in linux-user mode which doesn't provide functional
2580     * get_cp15/set_cp15 helpers, and is more efficient anyway.
2581     */
2582    switch ((insn & 0x0fff0fff)) {
2583    case 0x0e070f90:
2584        /* 0,c7,c0,4: Standard v6 WFI (also used in some pre-v6 cores).
2585         * In v7, this must NOP.
2586         */
2587        if (IS_USER(s)) {
2588            return 1;
2589        }
2590        if (!arm_feature(env, ARM_FEATURE_V7)) {
2591            /* Wait for interrupt.  */
2592            gen_set_pc_im(s->pc);
2593            s->is_jmp = DISAS_WFI;
2594        }
2595        return 0;
2596    case 0x0e070f58:
2597        /* 0,c7,c8,2: Not all pre-v6 cores implemented this WFI,
2598         * so this is slightly over-broad.
2599         */
2600        if (!IS_USER(s) && !arm_feature(env, ARM_FEATURE_V6)) {
2601            /* Wait for interrupt.  */
2602            gen_set_pc_im(s->pc);
2603            s->is_jmp = DISAS_WFI;
2604            return 0;
2605        }
2606        /* Otherwise continue to handle via helper function.
2607         * In particular, on v7 and some v6 cores this is one of
2608         * the VA-PA registers.
2609         */
2610        break;
2611    case 0x0e070f3d:
2612        /* 0,c7,c13,1: prefetch-by-MVA in v6, NOP in v7 */
2613        if (arm_feature(env, ARM_FEATURE_V6)) {
2614            return IS_USER(s) ? 1 : 0;
2615        }
2616        break;
2617    case 0x0e070f95: /* 0,c7,c5,4 : ISB */
2618    case 0x0e070f9a: /* 0,c7,c10,4: DSB */
2619    case 0x0e070fba: /* 0,c7,c10,5: DMB */
2620        /* Barriers in both v6 and v7 */
2621        if (arm_feature(env, ARM_FEATURE_V6)) {
2622            return 0;
2623        }
2624        break;
2625    default:
2626        break;
2627    }
2628
2629    if (IS_USER(s) && !cp15_user_ok(env, insn)) {
2630        return 1;
2631    }
2632
2633    rd = (insn >> 12) & 0xf;
2634
2635    if (cp15_tls_load_store(env, s, insn, rd))
2636        return 0;
2637
2638    tmp2 = tcg_const_i32(insn);
2639    if (insn & ARM_CP_RW_BIT) {
2640        tmp = tcg_temp_new_i32();
2641        gen_helper_get_cp15(tmp, cpu_env, tmp2);
2642        /* If the destination register is r15 then sets condition codes.  */
2643        if (rd != 15)
2644            store_reg(s, rd, tmp);
2645        else
2646            tcg_temp_free_i32(tmp);
2647    } else {
2648        tmp = load_reg(s, rd);
2649        gen_helper_set_cp15(cpu_env, tmp2, tmp);
2650        tcg_temp_free_i32(tmp);
2651        /* Normally we would always end the TB here, but Linux
2652         * arch/arm/mach-pxa/sleep.S expects two instructions following
2653         * an MMU enable to execute from cache.  Imitate this behaviour.  */
2654        if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2655                (insn & 0x0fff0fff) != 0x0e010f10)
2656            gen_lookup_tb(s);
2657    }
2658    tcg_temp_free_i32(tmp2);
2659    return 0;
2660}
2661
2662#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2663#define VFP_SREG(insn, bigbit, smallbit) \
2664  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2665#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2666    if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2667        reg = (((insn) >> (bigbit)) & 0x0f) \
2668              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2669    } else { \
2670        if (insn & (1 << (smallbit))) \
2671            return 1; \
2672        reg = ((insn) >> (bigbit)) & 0x0f; \
2673    }} while (0)
2674
2675#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2676#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2677#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2678#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2679#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2680#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2681
2682/* Move between integer and VFP cores.  */
2683static TCGv gen_vfp_mrs(void)
2684{
2685    TCGv tmp = tcg_temp_new_i32();
2686    tcg_gen_mov_i32(tmp, cpu_F0s);
2687    return tmp;
2688}
2689
2690static void gen_vfp_msr(TCGv tmp)
2691{
2692    tcg_gen_mov_i32(cpu_F0s, tmp);
2693    tcg_temp_free_i32(tmp);
2694}
2695
2696static void gen_neon_dup_u8(TCGv var, int shift)
2697{
2698    TCGv tmp = tcg_temp_new_i32();
2699    if (shift)
2700        tcg_gen_shri_i32(var, var, shift);
2701    tcg_gen_ext8u_i32(var, var);
2702    tcg_gen_shli_i32(tmp, var, 8);
2703    tcg_gen_or_i32(var, var, tmp);
2704    tcg_gen_shli_i32(tmp, var, 16);
2705    tcg_gen_or_i32(var, var, tmp);
2706    tcg_temp_free_i32(tmp);
2707}
2708
2709static void gen_neon_dup_low16(TCGv var)
2710{
2711    TCGv tmp = tcg_temp_new_i32();
2712    tcg_gen_ext16u_i32(var, var);
2713    tcg_gen_shli_i32(tmp, var, 16);
2714    tcg_gen_or_i32(var, var, tmp);
2715    tcg_temp_free_i32(tmp);
2716}
2717
2718static void gen_neon_dup_high16(TCGv var)
2719{
2720    TCGv tmp = tcg_temp_new_i32();
2721    tcg_gen_andi_i32(var, var, 0xffff0000);
2722    tcg_gen_shri_i32(tmp, var, 16);
2723    tcg_gen_or_i32(var, var, tmp);
2724    tcg_temp_free_i32(tmp);
2725}
2726
2727static TCGv gen_load_and_replicate(DisasContext *s, TCGv addr, int size)
2728{
2729    /* Load a single Neon element and replicate into a 32 bit TCG reg */
2730    TCGv tmp;
2731    switch (size) {
2732    case 0:
2733        tmp = gen_ld8u(addr, IS_USER(s));
2734        gen_neon_dup_u8(tmp, 0);
2735        break;
2736    case 1:
2737        tmp = gen_ld16u(addr, IS_USER(s));
2738        gen_neon_dup_low16(tmp);
2739        break;
2740    case 2:
2741        tmp = gen_ld32(addr, IS_USER(s));
2742        break;
2743    default: /* Avoid compiler warnings.  */
2744        abort();
2745    }
2746    return tmp;
2747}
2748
2749/* Disassemble a VFP instruction.  Returns nonzero if an error occurred
2750   (ie. an undefined instruction).  */
2751static int disas_vfp_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
2752{
2753    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2754    int dp, veclen;
2755    TCGv addr;
2756    TCGv tmp;
2757    TCGv tmp2;
2758
2759    if (!arm_feature(env, ARM_FEATURE_VFP))
2760        return 1;
2761
2762    if (!s->vfp_enabled) {
2763        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
2764        if ((insn & 0x0fe00fff) != 0x0ee00a10)
2765            return 1;
2766        rn = (insn >> 16) & 0xf;
2767        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2768            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2769            return 1;
2770    }
2771    dp = ((insn & 0xf00) == 0xb00);
2772    switch ((insn >> 24) & 0xf) {
2773    case 0xe:
2774        if (insn & (1 << 4)) {
2775            /* single register transfer */
2776            rd = (insn >> 12) & 0xf;
2777            if (dp) {
2778                int size;
2779                int pass;
2780
2781                VFP_DREG_N(rn, insn);
2782                if (insn & 0xf)
2783                    return 1;
2784                if (insn & 0x00c00060
2785                    && !arm_feature(env, ARM_FEATURE_NEON))
2786                    return 1;
2787
2788                pass = (insn >> 21) & 1;
2789                if (insn & (1 << 22)) {
2790                    size = 0;
2791                    offset = ((insn >> 5) & 3) * 8;
2792                } else if (insn & (1 << 5)) {
2793                    size = 1;
2794                    offset = (insn & (1 << 6)) ? 16 : 0;
2795                } else {
2796                    size = 2;
2797                    offset = 0;
2798                }
2799                if (insn & ARM_CP_RW_BIT) {
2800                    /* vfp->arm */
2801                    tmp = neon_load_reg(rn, pass);
2802                    switch (size) {
2803                    case 0:
2804                        if (offset)
2805                            tcg_gen_shri_i32(tmp, tmp, offset);
2806                        if (insn & (1 << 23))
2807                            gen_uxtb(tmp);
2808                        else
2809                            gen_sxtb(tmp);
2810                        break;
2811                    case 1:
2812                        if (insn & (1 << 23)) {
2813                            if (offset) {
2814                                tcg_gen_shri_i32(tmp, tmp, 16);
2815                            } else {
2816                                gen_uxth(tmp);
2817                            }
2818                        } else {
2819                            if (offset) {
2820                                tcg_gen_sari_i32(tmp, tmp, 16);
2821                            } else {
2822                                gen_sxth(tmp);
2823                            }
2824                        }
2825                        break;
2826                    case 2:
2827                        break;
2828                    }
2829                    store_reg(s, rd, tmp);
2830                } else {
2831                    /* arm->vfp */
2832                    tmp = load_reg(s, rd);
2833                    if (insn & (1 << 23)) {
2834                        /* VDUP */
2835                        if (size == 0) {
2836                            gen_neon_dup_u8(tmp, 0);
2837                        } else if (size == 1) {
2838                            gen_neon_dup_low16(tmp);
2839                        }
2840                        for (n = 0; n <= pass * 2; n++) {
2841                            tmp2 = tcg_temp_new_i32();
2842                            tcg_gen_mov_i32(tmp2, tmp);
2843                            neon_store_reg(rn, n, tmp2);
2844                        }
2845                        neon_store_reg(rn, n, tmp);
2846                    } else {
2847                        /* VMOV */
2848                        switch (size) {
2849                        case 0:
2850                            tmp2 = neon_load_reg(rn, pass);
2851                            gen_bfi(tmp, tmp2, tmp, offset, 0xff);
2852                            tcg_temp_free_i32(tmp2);
2853                            break;
2854                        case 1:
2855                            tmp2 = neon_load_reg(rn, pass);
2856                            gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
2857                            tcg_temp_free_i32(tmp2);
2858                            break;
2859                        case 2:
2860                            break;
2861                        }
2862                        neon_store_reg(rn, pass, tmp);
2863                    }
2864                }
2865            } else { /* !dp */
2866                if ((insn & 0x6f) != 0x00)
2867                    return 1;
2868                rn = VFP_SREG_N(insn);
2869                if (insn & ARM_CP_RW_BIT) {
2870                    /* vfp->arm */
2871                    if (insn & (1 << 21)) {
2872                        /* system register */
2873                        rn >>= 1;
2874
2875                        switch (rn) {
2876                        case ARM_VFP_FPSID:
2877                            /* VFP2 allows access to FSID from userspace.
2878                               VFP3 restricts all id registers to privileged
2879                               accesses.  */
2880                            if (IS_USER(s)
2881                                && arm_feature(env, ARM_FEATURE_VFP3))
2882                                return 1;
2883                            tmp = load_cpu_field(vfp.xregs[rn]);
2884                            break;
2885                        case ARM_VFP_FPEXC:
2886                            if (IS_USER(s))
2887                                return 1;
2888                            tmp = load_cpu_field(vfp.xregs[rn]);
2889                            break;
2890                        case ARM_VFP_FPINST:
2891                        case ARM_VFP_FPINST2:
2892                            /* Not present in VFP3.  */
2893                            if (IS_USER(s)
2894                                || arm_feature(env, ARM_FEATURE_VFP3))
2895                                return 1;
2896                            tmp = load_cpu_field(vfp.xregs[rn]);
2897                            break;
2898                        case ARM_VFP_FPSCR:
2899                            if (rd == 15) {
2900                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2901                                tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2902                            } else {
2903                                tmp = tcg_temp_new_i32();
2904                                gen_helper_vfp_get_fpscr(tmp, cpu_env);
2905                            }
2906                            break;
2907                        case ARM_VFP_MVFR0:
2908                        case ARM_VFP_MVFR1:
2909                            if (IS_USER(s)
2910                                || !arm_feature(env, ARM_FEATURE_VFP3))
2911                                return 1;
2912                            tmp = load_cpu_field(vfp.xregs[rn]);
2913                            break;
2914                        default:
2915                            return 1;
2916                        }
2917                    } else {
2918                        gen_mov_F0_vreg(0, rn);
2919                        tmp = gen_vfp_mrs();
2920                    }
2921                    if (rd == 15) {
2922                        /* Set the 4 flag bits in the CPSR.  */
2923                        gen_set_nzcv(tmp);
2924                        tcg_temp_free_i32(tmp);
2925                    } else {
2926                        store_reg(s, rd, tmp);
2927                    }
2928                } else {
2929                    /* arm->vfp */
2930                    tmp = load_reg(s, rd);
2931                    if (insn & (1 << 21)) {
2932                        rn >>= 1;
2933                        /* system register */
2934                        switch (rn) {
2935                        case ARM_VFP_FPSID:
2936                        case ARM_VFP_MVFR0:
2937                        case ARM_VFP_MVFR1:
2938                            /* Writes are ignored.  */
2939                            break;
2940                        case ARM_VFP_FPSCR:
2941                            gen_helper_vfp_set_fpscr(cpu_env, tmp);
2942                            tcg_temp_free_i32(tmp);
2943                            gen_lookup_tb(s);
2944                            break;
2945                        case ARM_VFP_FPEXC:
2946                            if (IS_USER(s))
2947                                return 1;
2948                            /* TODO: VFP subarchitecture support.
2949                             * For now, keep the EN bit only */
2950                            tcg_gen_andi_i32(tmp, tmp, 1 << 30);
2951                            store_cpu_field(tmp, vfp.xregs[rn]);
2952                            gen_lookup_tb(s);
2953                            break;
2954                        case ARM_VFP_FPINST:
2955                        case ARM_VFP_FPINST2:
2956                            store_cpu_field(tmp, vfp.xregs[rn]);
2957                            break;
2958                        default:
2959                            return 1;
2960                        }
2961                    } else {
2962                        gen_vfp_msr(tmp);
2963                        gen_mov_vreg_F0(0, rn);
2964                    }
2965                }
2966            }
2967        } else {
2968            /* data processing */
2969            /* The opcode is in bits 23, 21, 20 and 6.  */
2970            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2971            if (dp) {
2972                if (op == 15) {
2973                    /* rn is opcode */
2974                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2975                } else {
2976                    /* rn is register number */
2977                    VFP_DREG_N(rn, insn);
2978                }
2979
2980                if (op == 15 && (rn == 15 || ((rn & 0x1c) == 0x18))) {
2981                    /* Integer or single precision destination.  */
2982                    rd = VFP_SREG_D(insn);
2983                } else {
2984                    VFP_DREG_D(rd, insn);
2985                }
2986                if (op == 15 &&
2987                    (((rn & 0x1c) == 0x10) || ((rn & 0x14) == 0x14))) {
2988                    /* VCVT from int is always from S reg regardless of dp bit.
2989                     * VCVT with immediate frac_bits has same format as SREG_M
2990                     */
2991                    rm = VFP_SREG_M(insn);
2992                } else {
2993                    VFP_DREG_M(rm, insn);
2994                }
2995            } else {
2996                rn = VFP_SREG_N(insn);
2997                if (op == 15 && rn == 15) {
2998                    /* Double precision destination.  */
2999                    VFP_DREG_D(rd, insn);
3000                } else {
3001                    rd = VFP_SREG_D(insn);
3002                }
3003                /* NB that we implicitly rely on the encoding for the frac_bits
3004                 * in VCVT of fixed to float being the same as that of an SREG_M
3005                 */
3006                rm = VFP_SREG_M(insn);
3007            }
3008
3009            veclen = s->vec_len;
3010            if (op == 15 && rn > 3)
3011                veclen = 0;
3012
3013            /* Shut up compiler warnings.  */
3014            delta_m = 0;
3015            delta_d = 0;
3016            bank_mask = 0;
3017
3018            if (veclen > 0) {
3019                if (dp)
3020                    bank_mask = 0xc;
3021                else
3022                    bank_mask = 0x18;
3023
3024                /* Figure out what type of vector operation this is.  */
3025                if ((rd & bank_mask) == 0) {
3026                    /* scalar */
3027                    veclen = 0;
3028                } else {
3029                    if (dp)
3030                        delta_d = (s->vec_stride >> 1) + 1;
3031                    else
3032                        delta_d = s->vec_stride + 1;
3033
3034                    if ((rm & bank_mask) == 0) {
3035                        /* mixed scalar/vector */
3036                        delta_m = 0;
3037                    } else {
3038                        /* vector */
3039                        delta_m = delta_d;
3040                    }
3041                }
3042            }
3043
3044            /* Load the initial operands.  */
3045            if (op == 15) {
3046                switch (rn) {
3047                case 16:
3048                case 17:
3049                    /* Integer source */
3050                    gen_mov_F0_vreg(0, rm);
3051                    break;
3052                case 8:
3053                case 9:
3054                    /* Compare */
3055                    gen_mov_F0_vreg(dp, rd);
3056                    gen_mov_F1_vreg(dp, rm);
3057                    break;
3058                case 10:
3059                case 11:
3060                    /* Compare with zero */
3061                    gen_mov_F0_vreg(dp, rd);
3062                    gen_vfp_F1_ld0(dp);
3063                    break;
3064                case 20:
3065                case 21:
3066                case 22:
3067                case 23:
3068                case 28:
3069                case 29:
3070                case 30:
3071                case 31:
3072                    /* Source and destination the same.  */
3073                    gen_mov_F0_vreg(dp, rd);
3074                    break;
3075                case 4:
3076                case 5:
3077                case 6:
3078                case 7:
3079                    /* VCVTB, VCVTT: only present with the halfprec extension,
3080                     * UNPREDICTABLE if bit 8 is set (we choose to UNDEF)
3081                     */
3082                    if (dp || !arm_feature(env, ARM_FEATURE_VFP_FP16)) {
3083                        return 1;
3084                    }
3085                    /* Otherwise fall through */
3086                default:
3087                    /* One source operand.  */
3088                    gen_mov_F0_vreg(dp, rm);
3089                    break;
3090                }
3091            } else {
3092                /* Two source operands.  */
3093                gen_mov_F0_vreg(dp, rn);
3094                gen_mov_F1_vreg(dp, rm);
3095            }
3096
3097            for (;;) {
3098                /* Perform the calculation.  */
3099                switch (op) {
3100                case 0: /* VMLA: fd + (fn * fm) */
3101                    /* Note that order of inputs to the add matters for NaNs */
3102                    gen_vfp_F1_mul(dp);
3103                    gen_mov_F0_vreg(dp, rd);
3104                    gen_vfp_add(dp);
3105                    break;
3106                case 1: /* VMLS: fd + -(fn * fm) */
3107                    gen_vfp_mul(dp);
3108                    gen_vfp_F1_neg(dp);
3109                    gen_mov_F0_vreg(dp, rd);
3110                    gen_vfp_add(dp);
3111                    break;
3112                case 2: /* VNMLS: -fd + (fn * fm) */
3113                    /* Note that it isn't valid to replace (-A + B) with (B - A)
3114                     * or similar plausible looking simplifications
3115                     * because this will give wrong results for NaNs.
3116                     */
3117                    gen_vfp_F1_mul(dp);
3118                    gen_mov_F0_vreg(dp, rd);
3119                    gen_vfp_neg(dp);
3120                    gen_vfp_add(dp);
3121                    break;
3122                case 3: /* VNMLA: -fd + -(fn * fm) */
3123                    gen_vfp_mul(dp);
3124                    gen_vfp_F1_neg(dp);
3125                    gen_mov_F0_vreg(dp, rd);
3126                    gen_vfp_neg(dp);
3127                    gen_vfp_add(dp);
3128                    break;
3129                case 4: /* mul: fn * fm */
3130                    gen_vfp_mul(dp);
3131                    break;
3132                case 5: /* nmul: -(fn * fm) */
3133                    gen_vfp_mul(dp);
3134                    gen_vfp_neg(dp);
3135                    break;
3136                case 6: /* add: fn + fm */
3137                    gen_vfp_add(dp);
3138                    break;
3139                case 7: /* sub: fn - fm */
3140                    gen_vfp_sub(dp);
3141                    break;
3142                case 8: /* div: fn / fm */
3143                    gen_vfp_div(dp);
3144                    break;
3145                case 14: /* fconst */
3146                    if (!arm_feature(env, ARM_FEATURE_VFP3))
3147                      return 1;
3148
3149                    n = (insn << 12) & 0x80000000;
3150                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
3151                    if (dp) {
3152                        if (i & 0x40)
3153                            i |= 0x3f80;
3154                        else
3155                            i |= 0x4000;
3156                        n |= i << 16;
3157                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3158                    } else {
3159                        if (i & 0x40)
3160                            i |= 0x780;
3161                        else
3162                            i |= 0x800;
3163                        n |= i << 19;
3164                        tcg_gen_movi_i32(cpu_F0s, n);
3165                    }
3166                    break;
3167                case 15: /* extension space */
3168                    switch (rn) {
3169                    case 0: /* cpy */
3170                        /* no-op */
3171                        break;
3172                    case 1: /* abs */
3173                        gen_vfp_abs(dp);
3174                        break;
3175                    case 2: /* neg */
3176                        gen_vfp_neg(dp);
3177                        break;
3178                    case 3: /* sqrt */
3179                        gen_vfp_sqrt(dp);
3180                        break;
3181                    case 4: /* vcvtb.f32.f16 */
3182                        tmp = gen_vfp_mrs();
3183                        tcg_gen_ext16u_i32(tmp, tmp);
3184                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
3185                        tcg_temp_free_i32(tmp);
3186                        break;
3187                    case 5: /* vcvtt.f32.f16 */
3188                        tmp = gen_vfp_mrs();
3189                        tcg_gen_shri_i32(tmp, tmp, 16);
3190                        gen_helper_vfp_fcvt_f16_to_f32(cpu_F0s, tmp, cpu_env);
3191                        tcg_temp_free_i32(tmp);
3192                        break;
3193                    case 6: /* vcvtb.f16.f32 */
3194                        tmp = tcg_temp_new_i32();
3195                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
3196                        gen_mov_F0_vreg(0, rd);
3197                        tmp2 = gen_vfp_mrs();
3198                        tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
3199                        tcg_gen_or_i32(tmp, tmp, tmp2);
3200                        tcg_temp_free_i32(tmp2);
3201                        gen_vfp_msr(tmp);
3202                        break;
3203                    case 7: /* vcvtt.f16.f32 */
3204                        tmp = tcg_temp_new_i32();
3205                        gen_helper_vfp_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
3206                        tcg_gen_shli_i32(tmp, tmp, 16);
3207                        gen_mov_F0_vreg(0, rd);
3208                        tmp2 = gen_vfp_mrs();
3209                        tcg_gen_ext16u_i32(tmp2, tmp2);
3210                        tcg_gen_or_i32(tmp, tmp, tmp2);
3211                        tcg_temp_free_i32(tmp2);
3212                        gen_vfp_msr(tmp);
3213                        break;
3214                    case 8: /* cmp */
3215                        gen_vfp_cmp(dp);
3216                        break;
3217                    case 9: /* cmpe */
3218                        gen_vfp_cmpe(dp);
3219                        break;
3220                    case 10: /* cmpz */
3221                        gen_vfp_cmp(dp);
3222                        break;
3223                    case 11: /* cmpez */
3224                        gen_vfp_F1_ld0(dp);
3225                        gen_vfp_cmpe(dp);
3226                        break;
3227                    case 15: /* single<->double conversion */
3228                        if (dp)
3229                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3230                        else
3231                            gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3232                        break;
3233                    case 16: /* fuito */
3234                        gen_vfp_uito(dp, 0);
3235                        break;
3236                    case 17: /* fsito */
3237                        gen_vfp_sito(dp, 0);
3238                        break;
3239                    case 20: /* fshto */
3240                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3241                          return 1;
3242                        gen_vfp_shto(dp, 16 - rm, 0);
3243                        break;
3244                    case 21: /* fslto */
3245                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3246                          return 1;
3247                        gen_vfp_slto(dp, 32 - rm, 0);
3248                        break;
3249                    case 22: /* fuhto */
3250                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3251                          return 1;
3252                        gen_vfp_uhto(dp, 16 - rm, 0);
3253                        break;
3254                    case 23: /* fulto */
3255                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3256                          return 1;
3257                        gen_vfp_ulto(dp, 32 - rm, 0);
3258                        break;
3259                    case 24: /* ftoui */
3260                        gen_vfp_toui(dp, 0);
3261                        break;
3262                    case 25: /* ftouiz */
3263                        gen_vfp_touiz(dp, 0);
3264                        break;
3265                    case 26: /* ftosi */
3266                        gen_vfp_tosi(dp, 0);
3267                        break;
3268                    case 27: /* ftosiz */
3269                        gen_vfp_tosiz(dp, 0);
3270                        break;
3271                    case 28: /* ftosh */
3272                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3273                          return 1;
3274                        gen_vfp_tosh(dp, 16 - rm, 0);
3275                        break;
3276                    case 29: /* ftosl */
3277                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3278                          return 1;
3279                        gen_vfp_tosl(dp, 32 - rm, 0);
3280                        break;
3281                    case 30: /* ftouh */
3282                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3283                          return 1;
3284                        gen_vfp_touh(dp, 16 - rm, 0);
3285                        break;
3286                    case 31: /* ftoul */
3287                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3288                          return 1;
3289                        gen_vfp_toul(dp, 32 - rm, 0);
3290                        break;
3291                    default: /* undefined */
3292                        printf ("rn:%d\n", rn);
3293                        return 1;
3294                    }
3295                    break;
3296                default: /* undefined */
3297                    printf ("op:%d\n", op);
3298                    return 1;
3299                }
3300
3301                /* Write back the result.  */
3302                if (op == 15 && (rn >= 8 && rn <= 11))
3303                    ; /* Comparison, do nothing.  */
3304                else if (op == 15 && dp && ((rn & 0x1c) == 0x18))
3305                    /* VCVT double to int: always integer result. */
3306                    gen_mov_vreg_F0(0, rd);
3307                else if (op == 15 && rn == 15)
3308                    /* conversion */
3309                    gen_mov_vreg_F0(!dp, rd);
3310                else
3311                    gen_mov_vreg_F0(dp, rd);
3312
3313                /* break out of the loop if we have finished  */
3314                if (veclen == 0)
3315                    break;
3316
3317                if (op == 15 && delta_m == 0) {
3318                    /* single source one-many */
3319                    while (veclen--) {
3320                        rd = ((rd + delta_d) & (bank_mask - 1))
3321                             | (rd & bank_mask);
3322                        gen_mov_vreg_F0(dp, rd);
3323                    }
3324                    break;
3325                }
3326                /* Setup the next operands.  */
3327                veclen--;
3328                rd = ((rd + delta_d) & (bank_mask - 1))
3329                     | (rd & bank_mask);
3330
3331                if (op == 15) {
3332                    /* One source operand.  */
3333                    rm = ((rm + delta_m) & (bank_mask - 1))
3334                         | (rm & bank_mask);
3335                    gen_mov_F0_vreg(dp, rm);
3336                } else {
3337                    /* Two source operands.  */
3338                    rn = ((rn + delta_d) & (bank_mask - 1))
3339                         | (rn & bank_mask);
3340                    gen_mov_F0_vreg(dp, rn);
3341                    if (delta_m) {
3342                        rm = ((rm + delta_m) & (bank_mask - 1))
3343                             | (rm & bank_mask);
3344                        gen_mov_F1_vreg(dp, rm);
3345                    }
3346                }
3347            }
3348        }
3349        break;
3350    case 0xc:
3351    case 0xd:
3352        if ((insn & 0x03e00000) == 0x00400000) {
3353            /* two-register transfer */
3354            rn = (insn >> 16) & 0xf;
3355            rd = (insn >> 12) & 0xf;
3356            if (dp) {
3357                VFP_DREG_M(rm, insn);
3358            } else {
3359                rm = VFP_SREG_M(insn);
3360            }
3361
3362            if (insn & ARM_CP_RW_BIT) {
3363                /* vfp->arm */
3364                if (dp) {
3365                    gen_mov_F0_vreg(0, rm * 2);
3366                    tmp = gen_vfp_mrs();
3367                    store_reg(s, rd, tmp);
3368                    gen_mov_F0_vreg(0, rm * 2 + 1);
3369                    tmp = gen_vfp_mrs();
3370                    store_reg(s, rn, tmp);
3371                } else {
3372                    gen_mov_F0_vreg(0, rm);
3373                    tmp = gen_vfp_mrs();
3374                    store_reg(s, rd, tmp);
3375                    gen_mov_F0_vreg(0, rm + 1);
3376                    tmp = gen_vfp_mrs();
3377                    store_reg(s, rn, tmp);
3378                }
3379            } else {
3380                /* arm->vfp */
3381                if (dp) {
3382                    tmp = load_reg(s, rd);
3383                    gen_vfp_msr(tmp);
3384                    gen_mov_vreg_F0(0, rm * 2);
3385                    tmp = load_reg(s, rn);
3386                    gen_vfp_msr(tmp);
3387                    gen_mov_vreg_F0(0, rm * 2 + 1);
3388                } else {
3389                    tmp = load_reg(s, rd);
3390                    gen_vfp_msr(tmp);
3391                    gen_mov_vreg_F0(0, rm);
3392                    tmp = load_reg(s, rn);
3393                    gen_vfp_msr(tmp);
3394                    gen_mov_vreg_F0(0, rm + 1);
3395                }
3396            }
3397        } else {
3398            /* Load/store */
3399            rn = (insn >> 16) & 0xf;
3400            if (dp)
3401                VFP_DREG_D(rd, insn);
3402            else
3403                rd = VFP_SREG_D(insn);
3404            if ((insn & 0x01200000) == 0x01000000) {
3405                /* Single load/store */
3406                offset = (insn & 0xff) << 2;
3407                if ((insn & (1 << 23)) == 0)
3408                    offset = -offset;
3409                if (s->thumb && rn == 15) {
3410                    /* This is actually UNPREDICTABLE */
3411                    addr = tcg_temp_new_i32();
3412                    tcg_gen_movi_i32(addr, s->pc & ~2);
3413                } else {
3414                    addr = load_reg(s, rn);
3415                }
3416                tcg_gen_addi_i32(addr, addr, offset);
3417                if (insn & (1 << 20)) {
3418                    gen_vfp_ld(s, dp, addr);
3419                    gen_mov_vreg_F0(dp, rd);
3420                } else {
3421                    gen_mov_F0_vreg(dp, rd);
3422                    gen_vfp_st(s, dp, addr);
3423                }
3424                tcg_temp_free_i32(addr);
3425            } else {
3426                /* load/store multiple */
3427                int w = insn & (1 << 21);
3428                if (dp)
3429                    n = (insn >> 1) & 0x7f;
3430                else
3431                    n = insn & 0xff;
3432
3433                if (w && !(((insn >> 23) ^ (insn >> 24)) & 1)) {
3434                    /* P == U , W == 1  => UNDEF */
3435                    return 1;
3436                }
3437                if (n == 0 || (rd + n) > 32 || (dp && n > 16)) {
3438                    /* UNPREDICTABLE cases for bad immediates: we choose to
3439                     * UNDEF to avoid generating huge numbers of TCG ops
3440                     */
3441                    return 1;
3442                }
3443                if (rn == 15 && w) {
3444                    /* writeback to PC is UNPREDICTABLE, we choose to UNDEF */
3445                    return 1;
3446                }
3447
3448                if (s->thumb && rn == 15) {
3449                    /* This is actually UNPREDICTABLE */
3450                    addr = tcg_temp_new_i32();
3451                    tcg_gen_movi_i32(addr, s->pc & ~2);
3452                } else {
3453                    addr = load_reg(s, rn);
3454                }
3455                if (insn & (1 << 24)) /* pre-decrement */
3456                    tcg_gen_addi_i32(addr, addr, -((insn & 0xff) << 2));
3457
3458                if (dp)
3459                    offset = 8;
3460                else
3461                    offset = 4;
3462                tmp = tcg_const_i32(offset);
3463                for (i = 0; i < n; i++) {
3464                    if (insn & ARM_CP_RW_BIT) {
3465                        /* load */
3466                        gen_vfp_ld(s, dp, addr);
3467                        gen_mov_vreg_F0(dp, rd + i);
3468                    } else {
3469                        /* store */
3470                        gen_mov_F0_vreg(dp, rd + i);
3471                        gen_vfp_st(s, dp, addr);
3472                    }
3473                    tcg_gen_add_i32(addr, addr, tmp);
3474                }
3475                tcg_temp_free_i32(tmp);
3476                if (w) {
3477                    /* writeback */
3478                    if (insn & (1 << 24))
3479                        offset = -offset * n;
3480                    else if (dp && (insn & 1))
3481                        offset = 4;
3482                    else
3483                        offset = 0;
3484
3485                    if (offset != 0)
3486                        tcg_gen_addi_i32(addr, addr, offset);
3487                    store_reg(s, rn, addr);
3488                } else {
3489                    tcg_temp_free_i32(addr);
3490                }
3491            }
3492        }
3493        break;
3494    default:
3495        /* Should never happen.  */
3496        return 1;
3497    }
3498    return 0;
3499}
3500
3501static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3502{
3503    TranslationBlock *tb;
3504
3505    tb = s->tb;
3506    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3507        tcg_gen_goto_tb(n);
3508        gen_set_pc_im(dest);
3509        tcg_gen_exit_tb((tcg_target_long)tb + n);
3510    } else {
3511        gen_set_pc_im(dest);
3512        tcg_gen_exit_tb(0);
3513    }
3514}
3515
3516static inline void gen_jmp (DisasContext *s, uint32_t dest)
3517{
3518    if (unlikely(s->singlestep_enabled)) {
3519        /* An indirect jump so that we still trigger the debug exception.  */
3520        if (s->thumb)
3521            dest |= 1;
3522        gen_bx_im(s, dest);
3523    } else {
3524        gen_goto_tb(s, 0, dest);
3525        s->is_jmp = DISAS_TB_JUMP;
3526    }
3527}
3528
3529static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3530{
3531    if (x)
3532        tcg_gen_sari_i32(t0, t0, 16);
3533    else
3534        gen_sxth(t0);
3535    if (y)
3536        tcg_gen_sari_i32(t1, t1, 16);
3537    else
3538        gen_sxth(t1);
3539    tcg_gen_mul_i32(t0, t0, t1);
3540}
3541
3542/* Return the mask of PSR bits set by a MSR instruction.  */
3543static uint32_t msr_mask(CPUARMState *env, DisasContext *s, int flags, int spsr) {
3544    uint32_t mask;
3545
3546    mask = 0;
3547    if (flags & (1 << 0))
3548        mask |= 0xff;
3549    if (flags & (1 << 1))
3550        mask |= 0xff00;
3551    if (flags & (1 << 2))
3552        mask |= 0xff0000;
3553    if (flags & (1 << 3))
3554        mask |= 0xff000000;
3555
3556    /* Mask out undefined bits.  */
3557    mask &= ~CPSR_RESERVED;
3558    if (!arm_feature(env, ARM_FEATURE_V4T))
3559        mask &= ~CPSR_T;
3560    if (!arm_feature(env, ARM_FEATURE_V5))
3561        mask &= ~CPSR_Q; /* V5TE in reality*/
3562    if (!arm_feature(env, ARM_FEATURE_V6))
3563        mask &= ~(CPSR_E | CPSR_GE);
3564    if (!arm_feature(env, ARM_FEATURE_THUMB2))
3565        mask &= ~CPSR_IT;
3566    /* Mask out execution state bits.  */
3567    if (!spsr)
3568        mask &= ~CPSR_EXEC;
3569    /* Mask out privileged bits.  */
3570    if (IS_USER(s))
3571        mask &= CPSR_USER;
3572    return mask;
3573}
3574
3575/* Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. */
3576static int gen_set_psr(DisasContext *s, uint32_t mask, int spsr, TCGv t0)
3577{
3578    TCGv tmp;
3579    if (spsr) {
3580        /* ??? This is also undefined in system mode.  */
3581        if (IS_USER(s))
3582            return 1;
3583
3584        tmp = load_cpu_field(spsr);
3585        tcg_gen_andi_i32(tmp, tmp, ~mask);
3586        tcg_gen_andi_i32(t0, t0, mask);
3587        tcg_gen_or_i32(tmp, tmp, t0);
3588        store_cpu_field(tmp, spsr);
3589    } else {
3590        gen_set_cpsr(t0, mask);
3591    }
3592    tcg_temp_free_i32(t0);
3593    gen_lookup_tb(s);
3594    return 0;
3595}
3596
3597/* Returns nonzero if access to the PSR is not permitted.  */
3598static int gen_set_psr_im(DisasContext *s, uint32_t mask, int spsr, uint32_t val)
3599{
3600    TCGv tmp;
3601    tmp = tcg_temp_new_i32();
3602    tcg_gen_movi_i32(tmp, val);
3603    return gen_set_psr(s, mask, spsr, tmp);
3604}
3605
3606/* Generate an old-style exception return. Marks pc as dead. */
3607static void gen_exception_return(DisasContext *s, TCGv pc)
3608{
3609    TCGv tmp;
3610    store_reg(s, 15, pc);
3611    tmp = load_cpu_field(spsr);
3612    gen_set_cpsr(tmp, 0xffffffff);
3613    tcg_temp_free_i32(tmp);
3614    s->is_jmp = DISAS_UPDATE;
3615}
3616
3617/* Generate a v6 exception return.  Marks both values as dead.  */
3618static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3619{
3620    gen_set_cpsr(cpsr, 0xffffffff);
3621    tcg_temp_free_i32(cpsr);
3622    store_reg(s, 15, pc);
3623    s->is_jmp = DISAS_UPDATE;
3624}
3625
3626static inline void
3627gen_set_condexec (DisasContext *s)
3628{
3629    if (s->condexec_mask) {
3630        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3631        TCGv tmp = tcg_temp_new_i32();
3632        tcg_gen_movi_i32(tmp, val);
3633        store_cpu_field(tmp, condexec_bits);
3634    }
3635}
3636
3637static void gen_exception_insn(DisasContext *s, int offset, int excp)
3638{
3639    gen_set_condexec(s);
3640    gen_set_pc_im(s->pc - offset);
3641    gen_exception(excp);
3642    s->is_jmp = DISAS_JUMP;
3643}
3644
3645static void gen_nop_hint(DisasContext *s, int val)
3646{
3647    switch (val) {
3648    case 3: /* wfi */
3649        gen_set_pc_im(s->pc);
3650        s->is_jmp = DISAS_WFI;
3651        break;
3652    case 2: /* wfe */
3653    case 4: /* sev */
3654        /* TODO: Implement SEV and WFE.  May help SMP performance.  */
3655    default: /* nop */
3656        break;
3657    }
3658}
3659
3660#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3661
3662static inline void gen_neon_add(int size, TCGv t0, TCGv t1)
3663{
3664    switch (size) {
3665    case 0: gen_helper_neon_add_u8(t0, t0, t1); break;
3666    case 1: gen_helper_neon_add_u16(t0, t0, t1); break;
3667    case 2: tcg_gen_add_i32(t0, t0, t1); break;
3668    default: abort();
3669    }
3670}
3671
3672static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1)
3673{
3674    switch (size) {
3675    case 0: gen_helper_neon_sub_u8(t0, t1, t0); break;
3676    case 1: gen_helper_neon_sub_u16(t0, t1, t0); break;
3677    case 2: tcg_gen_sub_i32(t0, t1, t0); break;
3678    default: return;
3679    }
3680}
3681
3682/* 32-bit pairwise ops end up the same as the elementwise versions.  */
3683#define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
3684#define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
3685#define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
3686#define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
3687
3688#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3689    switch ((size << 1) | u) { \
3690    case 0: \
3691        gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3692        break; \
3693    case 1: \
3694        gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3695        break; \
3696    case 2: \
3697        gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3698        break; \
3699    case 3: \
3700        gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3701        break; \
3702    case 4: \
3703        gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3704        break; \
3705    case 5: \
3706        gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3707        break; \
3708    default: return 1; \
3709    }} while (0)
3710
3711#define GEN_NEON_INTEGER_OP(name) do { \
3712    switch ((size << 1) | u) { \
3713    case 0: \
3714        gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3715        break; \
3716    case 1: \
3717        gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3718        break; \
3719    case 2: \
3720        gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3721        break; \
3722    case 3: \
3723        gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3724        break; \
3725    case 4: \
3726        gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3727        break; \
3728    case 5: \
3729        gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3730        break; \
3731    default: return 1; \
3732    }} while (0)
3733
3734static TCGv neon_load_scratch(int scratch)
3735{
3736    TCGv tmp = tcg_temp_new_i32();
3737    tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3738    return tmp;
3739}
3740
3741static void neon_store_scratch(int scratch, TCGv var)
3742{
3743    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3744    tcg_temp_free_i32(var);
3745}
3746
3747static inline TCGv neon_get_scalar(int size, int reg)
3748{
3749    TCGv tmp;
3750    if (size == 1) {
3751        tmp = neon_load_reg(reg & 7, reg >> 4);
3752        if (reg & 8) {
3753            gen_neon_dup_high16(tmp);
3754        } else {
3755            gen_neon_dup_low16(tmp);
3756        }
3757    } else {
3758        tmp = neon_load_reg(reg & 15, reg >> 4);
3759    }
3760    return tmp;
3761}
3762
3763static int gen_neon_unzip(int rd, int rm, int size, int q)
3764{
3765    TCGv tmp, tmp2;
3766    if (!q && size == 2) {
3767        return 1;
3768    }
3769    tmp = tcg_const_i32(rd);
3770    tmp2 = tcg_const_i32(rm);
3771    if (q) {
3772        switch (size) {
3773        case 0:
3774            gen_helper_neon_qunzip8(cpu_env, tmp, tmp2);
3775            break;
3776        case 1:
3777            gen_helper_neon_qunzip16(cpu_env, tmp, tmp2);
3778            break;
3779        case 2:
3780            gen_helper_neon_qunzip32(cpu_env, tmp, tmp2);
3781            break;
3782        default:
3783            abort();
3784        }
3785    } else {
3786        switch (size) {
3787        case 0:
3788            gen_helper_neon_unzip8(cpu_env, tmp, tmp2);
3789            break;
3790        case 1:
3791            gen_helper_neon_unzip16(cpu_env, tmp, tmp2);
3792            break;
3793        default:
3794            abort();
3795        }
3796    }
3797    tcg_temp_free_i32(tmp);
3798    tcg_temp_free_i32(tmp2);
3799    return 0;
3800}
3801
3802static int gen_neon_zip(int rd, int rm, int size, int q)
3803{
3804    TCGv tmp, tmp2;
3805    if (!q && size == 2) {
3806        return 1;
3807    }
3808    tmp = tcg_const_i32(rd);
3809    tmp2 = tcg_const_i32(rm);
3810    if (q) {
3811        switch (size) {
3812        case 0:
3813            gen_helper_neon_qzip8(cpu_env, tmp, tmp2);
3814            break;
3815        case 1:
3816            gen_helper_neon_qzip16(cpu_env, tmp, tmp2);
3817            break;
3818        case 2:
3819            gen_helper_neon_qzip32(cpu_env, tmp, tmp2);
3820            break;
3821        default:
3822            abort();
3823        }
3824    } else {
3825        switch (size) {
3826        case 0:
3827            gen_helper_neon_zip8(cpu_env, tmp, tmp2);
3828            break;
3829        case 1:
3830            gen_helper_neon_zip16(cpu_env, tmp, tmp2);
3831            break;
3832        default:
3833            abort();
3834        }
3835    }
3836    tcg_temp_free_i32(tmp);
3837    tcg_temp_free_i32(tmp2);
3838    return 0;
3839}
3840
3841static void gen_neon_trn_u8(TCGv t0, TCGv t1)
3842{
3843    TCGv rd, tmp;
3844
3845    rd = tcg_temp_new_i32();
3846    tmp = tcg_temp_new_i32();
3847
3848    tcg_gen_shli_i32(rd, t0, 8);
3849    tcg_gen_andi_i32(rd, rd, 0xff00ff00);
3850    tcg_gen_andi_i32(tmp, t1, 0x00ff00ff);
3851    tcg_gen_or_i32(rd, rd, tmp);
3852
3853    tcg_gen_shri_i32(t1, t1, 8);
3854    tcg_gen_andi_i32(t1, t1, 0x00ff00ff);
3855    tcg_gen_andi_i32(tmp, t0, 0xff00ff00);
3856    tcg_gen_or_i32(t1, t1, tmp);
3857    tcg_gen_mov_i32(t0, rd);
3858
3859    tcg_temp_free_i32(tmp);
3860    tcg_temp_free_i32(rd);
3861}
3862
3863static void gen_neon_trn_u16(TCGv t0, TCGv t1)
3864{
3865    TCGv rd, tmp;
3866
3867    rd = tcg_temp_new_i32();
3868    tmp = tcg_temp_new_i32();
3869
3870    tcg_gen_shli_i32(rd, t0, 16);
3871    tcg_gen_andi_i32(tmp, t1, 0xffff);
3872    tcg_gen_or_i32(rd, rd, tmp);
3873    tcg_gen_shri_i32(t1, t1, 16);
3874    tcg_gen_andi_i32(tmp, t0, 0xffff0000);
3875    tcg_gen_or_i32(t1, t1, tmp);
3876    tcg_gen_mov_i32(t0, rd);
3877
3878    tcg_temp_free_i32(tmp);
3879    tcg_temp_free_i32(rd);
3880}
3881
3882
3883static struct {
3884    int nregs;
3885    int interleave;
3886    int spacing;
3887} neon_ls_element_type[11] = {
3888    {4, 4, 1},
3889    {4, 4, 2},
3890    {4, 1, 1},
3891    {4, 2, 1},
3892    {3, 3, 1},
3893    {3, 3, 2},
3894    {3, 1, 1},
3895    {1, 1, 1},
3896    {2, 2, 1},
3897    {2, 2, 2},
3898    {2, 1, 1}
3899};
3900
3901/* Translate a NEON load/store element instruction.  Return nonzero if the
3902   instruction is invalid.  */
3903static int disas_neon_ls_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
3904{
3905    int rd, rn, rm;
3906    int op;
3907    int nregs;
3908    int interleave;
3909    int spacing;
3910    int stride;
3911    int size;
3912    int reg;
3913    int pass;
3914    int load;
3915    int shift;
3916    TCGv addr;
3917    TCGv tmp;
3918    TCGv tmp2;
3919
3920    if (!s->vfp_enabled)
3921      return 1;
3922    VFP_DREG_D(rd, insn);
3923    rn = (insn >> 16) & 0xf;
3924    rm = insn & 0xf;
3925    load = (insn & (1 << 21)) != 0;
3926    if ((insn & (1 << 23)) == 0) {
3927        /* Load store all elements.  */
3928        op = (insn >> 8) & 0xf;
3929        size = (insn >> 6) & 3;
3930        if (op > 10)
3931            return 1;
3932        /* Catch UNDEF cases for bad values of align field */
3933        switch (op & 0xc) {
3934        case 4:
3935            if (((insn >> 5) & 1) == 1) {
3936                return 1;
3937            }
3938            break;
3939        case 8:
3940            if (((insn >> 4) & 3) == 3) {
3941                return 1;
3942            }
3943            break;
3944        default:
3945            break;
3946        }
3947        nregs = neon_ls_element_type[op].nregs;
3948        interleave = neon_ls_element_type[op].interleave;
3949        spacing = neon_ls_element_type[op].spacing;
3950        if (size == 3 && (interleave | spacing) != 1) {
3951            return 1;
3952        }
3953        addr = tcg_const_i32(insn);
3954        gen_helper_neon_vldst_all(cpu_env, addr);
3955        tcg_temp_free_i32(addr);
3956        stride = nregs * 8;
3957    } else {
3958        size = (insn >> 10) & 3;
3959        if (size == 3) {
3960            /* Load single element to all lanes.  */
3961            int a = (insn >> 4) & 1;
3962            if (!load) {
3963                return 1;
3964            }
3965            size = (insn >> 6) & 3;
3966            nregs = ((insn >> 8) & 3) + 1;
3967
3968            if (size == 3) {
3969                if (nregs != 4 || a == 0) {
3970                    return 1;
3971                }
3972                /* For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment */
3973                size = 2;
3974            }
3975            if (nregs == 1 && a == 1 && size == 0) {
3976                return 1;
3977            }
3978            if (nregs == 3 && a == 1) {
3979                return 1;
3980            }
3981            addr = tcg_temp_new_i32();
3982            load_reg_var(s, addr, rn);
3983            if (nregs == 1) {
3984                /* VLD1 to all lanes: bit 5 indicates how many Dregs to write */
3985                tmp = gen_load_and_replicate(s, addr, size);
3986                tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
3987                tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
3988                if (insn & (1 << 5)) {
3989                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 0));
3990                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd + 1, 1));
3991                }
3992                tcg_temp_free_i32(tmp);
3993            } else {
3994                /* VLD2/3/4 to all lanes: bit 5 indicates register stride */
3995                stride = (insn & (1 << 5)) ? 2 : 1;
3996                for (reg = 0; reg < nregs; reg++) {
3997                    tmp = gen_load_and_replicate(s, addr, size);
3998                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 0));
3999                    tcg_gen_st_i32(tmp, cpu_env, neon_reg_offset(rd, 1));
4000                    tcg_temp_free_i32(tmp);
4001                    tcg_gen_addi_i32(addr, addr, 1 << size);
4002                    rd += stride;
4003                }
4004            }
4005            tcg_temp_free_i32(addr);
4006            stride = (1 << size) * nregs;
4007        } else {
4008            /* Single element.  */
4009            int idx = (insn >> 4) & 0xf;
4010            pass = (insn >> 7) & 1;
4011            switch (size) {
4012            case 0:
4013                shift = ((insn >> 5) & 3) * 8;
4014                stride = 1;
4015                break;
4016            case 1:
4017                shift = ((insn >> 6) & 1) * 16;
4018                stride = (insn & (1 << 5)) ? 2 : 1;
4019                break;
4020            case 2:
4021                shift = 0;
4022                stride = (insn & (1 << 6)) ? 2 : 1;
4023                break;
4024            default:
4025                abort();
4026            }
4027            nregs = ((insn >> 8) & 3) + 1;
4028            /* Catch the UNDEF cases. This is unavoidably a bit messy. */
4029            switch (nregs) {
4030            case 1:
4031                if (((idx & (1 << size)) != 0) ||
4032                    (size == 2 && ((idx & 3) == 1 || (idx & 3) == 2))) {
4033                    return 1;
4034                }
4035                break;
4036            case 3:
4037                if ((idx & 1) != 0) {
4038                    return 1;
4039                }
4040                /* fall through */
4041            case 2:
4042                if (size == 2 && (idx & 2) != 0) {
4043                    return 1;
4044                }
4045                break;
4046            case 4:
4047                if ((size == 2) && ((idx & 3) == 3)) {
4048                    return 1;
4049                }
4050                break;
4051            default:
4052                abort();
4053            }
4054            if ((rd + stride * (nregs - 1)) > 31) {
4055                /* Attempts to write off the end of the register file
4056                 * are UNPREDICTABLE; we choose to UNDEF because otherwise
4057                 * the neon_load_reg() would write off the end of the array.
4058                 */
4059                return 1;
4060            }
4061            addr = tcg_temp_new_i32();
4062            load_reg_var(s, addr, rn);
4063            for (reg = 0; reg < nregs; reg++) {
4064                if (load) {
4065                    switch (size) {
4066                    case 0:
4067                        tmp = gen_ld8u(addr, IS_USER(s));
4068                        break;
4069                    case 1:
4070                        tmp = gen_ld16u(addr, IS_USER(s));
4071                        break;
4072                    case 2:
4073                        tmp = gen_ld32(addr, IS_USER(s));
4074                        break;
4075                    default: /* Avoid compiler warnings.  */
4076                        abort();
4077                    }
4078                    if (size != 2) {
4079                        tmp2 = neon_load_reg(rd, pass);
4080                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
4081                        tcg_temp_free_i32(tmp2);
4082                    }
4083                    neon_store_reg(rd, pass, tmp);
4084                } else { /* Store */
4085                    tmp = neon_load_reg(rd, pass);
4086                    if (shift)
4087                        tcg_gen_shri_i32(tmp, tmp, shift);
4088                    switch (size) {
4089                    case 0:
4090                        gen_st8(tmp, addr, IS_USER(s));
4091                        break;
4092                    case 1:
4093                        gen_st16(tmp, addr, IS_USER(s));
4094                        break;
4095                    case 2:
4096                        gen_st32(tmp, addr, IS_USER(s));
4097                        break;
4098                    }
4099                }
4100                rd += stride;
4101                tcg_gen_addi_i32(addr, addr, 1 << size);
4102            }
4103            tcg_temp_free_i32(addr);
4104            stride = nregs * (1 << size);
4105        }
4106    }
4107    if (rm != 15) {
4108        TCGv base;
4109
4110        base = load_reg(s, rn);
4111        if (rm == 13) {
4112            tcg_gen_addi_i32(base, base, stride);
4113        } else {
4114            TCGv index;
4115            index = load_reg(s, rm);
4116            tcg_gen_add_i32(base, base, index);
4117            tcg_temp_free_i32(index);
4118        }
4119        store_reg(s, rn, base);
4120    }
4121    return 0;
4122}
4123
4124/* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
4125static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
4126{
4127    tcg_gen_and_i32(t, t, c);
4128    tcg_gen_andc_i32(f, f, c);
4129    tcg_gen_or_i32(dest, t, f);
4130}
4131
4132static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
4133{
4134    switch (size) {
4135    case 0: gen_helper_neon_narrow_u8(dest, src); break;
4136    case 1: gen_helper_neon_narrow_u16(dest, src); break;
4137    case 2: tcg_gen_trunc_i64_i32(dest, src); break;
4138    default: abort();
4139    }
4140}
4141
4142static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
4143{
4144    switch (size) {
4145    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
4146    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
4147    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
4148    default: abort();
4149    }
4150}
4151
4152static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
4153{
4154    switch (size) {
4155    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
4156    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
4157    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
4158    default: abort();
4159    }
4160}
4161
4162static inline void gen_neon_unarrow_sats(int size, TCGv dest, TCGv_i64 src)
4163{
4164    switch (size) {
4165    case 0: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
4166    case 1: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
4167    case 2: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
4168    default: abort();
4169    }
4170}
4171
4172static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
4173                                         int q, int u)
4174{
4175    if (q) {
4176        if (u) {
4177            switch (size) {
4178            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
4179            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
4180            default: abort();
4181            }
4182        } else {
4183            switch (size) {
4184            case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
4185            case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
4186            default: abort();
4187            }
4188        }
4189    } else {
4190        if (u) {
4191            switch (size) {
4192            case 1: gen_helper_neon_shl_u16(var, var, shift); break;
4193            case 2: gen_helper_neon_shl_u32(var, var, shift); break;
4194            default: abort();
4195            }
4196        } else {
4197            switch (size) {
4198            case 1: gen_helper_neon_shl_s16(var, var, shift); break;
4199            case 2: gen_helper_neon_shl_s32(var, var, shift); break;
4200            default: abort();
4201            }
4202        }
4203    }
4204}
4205
4206static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
4207{
4208    if (u) {
4209        switch (size) {
4210        case 0: gen_helper_neon_widen_u8(dest, src); break;
4211        case 1: gen_helper_neon_widen_u16(dest, src); break;
4212        case 2: tcg_gen_extu_i32_i64(dest, src); break;
4213        default: abort();
4214        }
4215    } else {
4216        switch (size) {
4217        case 0: gen_helper_neon_widen_s8(dest, src); break;
4218        case 1: gen_helper_neon_widen_s16(dest, src); break;
4219        case 2: tcg_gen_ext_i32_i64(dest, src); break;
4220        default: abort();
4221        }
4222    }
4223    tcg_temp_free_i32(src);
4224}
4225
4226static inline void gen_neon_addl(int size)
4227{
4228    switch (size) {
4229    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4230    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4231    case 2: tcg_gen_add_i64(CPU_V001); break;
4232    default: abort();
4233    }
4234}
4235
4236static inline void gen_neon_subl(int size)
4237{
4238    switch (size) {
4239    case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4240    case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4241    case 2: tcg_gen_sub_i64(CPU_V001); break;
4242    default: abort();
4243    }
4244}
4245
4246static inline void gen_neon_negl(TCGv_i64 var, int size)
4247{
4248    switch (size) {
4249    case 0: gen_helper_neon_negl_u16(var, var); break;
4250    case 1: gen_helper_neon_negl_u32(var, var); break;
4251    case 2: gen_helper_neon_negl_u64(var, var); break;
4252    default: abort();
4253    }
4254}
4255
4256static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4257{
4258    switch (size) {
4259    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4260    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4261    default: abort();
4262    }
4263}
4264
4265static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4266{
4267    TCGv_i64 tmp;
4268
4269    switch ((size << 1) | u) {
4270    case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4271    case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4272    case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4273    case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4274    case 4:
4275        tmp = gen_muls_i64_i32(a, b);
4276        tcg_gen_mov_i64(dest, tmp);
4277        tcg_temp_free_i64(tmp);
4278        break;
4279    case 5:
4280        tmp = gen_mulu_i64_i32(a, b);
4281        tcg_gen_mov_i64(dest, tmp);
4282        tcg_temp_free_i64(tmp);
4283        break;
4284    default: abort();
4285    }
4286
4287    /* gen_helper_neon_mull_[su]{8|16} do not free their parameters.
4288       Don't forget to clean them now.  */
4289    if (size < 2) {
4290        tcg_temp_free_i32(a);
4291        tcg_temp_free_i32(b);
4292    }
4293}
4294
4295static void gen_neon_narrow_op(int op, int u, int size, TCGv dest, TCGv_i64 src)
4296{
4297    if (op) {
4298        if (u) {
4299            gen_neon_unarrow_sats(size, dest, src);
4300        } else {
4301            gen_neon_narrow(size, dest, src);
4302        }
4303    } else {
4304        if (u) {
4305            gen_neon_narrow_satu(size, dest, src);
4306        } else {
4307            gen_neon_narrow_sats(size, dest, src);
4308        }
4309    }
4310}
4311
4312/* Symbolic constants for op fields for Neon 3-register same-length.
4313 * The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
4314 * table A7-9.
4315 */
4316#define NEON_3R_VHADD 0
4317#define NEON_3R_VQADD 1
4318#define NEON_3R_VRHADD 2
4319#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
4320#define NEON_3R_VHSUB 4
4321#define NEON_3R_VQSUB 5
4322#define NEON_3R_VCGT 6
4323#define NEON_3R_VCGE 7
4324#define NEON_3R_VSHL 8
4325#define NEON_3R_VQSHL 9
4326#define NEON_3R_VRSHL 10
4327#define NEON_3R_VQRSHL 11
4328#define NEON_3R_VMAX 12
4329#define NEON_3R_VMIN 13
4330#define NEON_3R_VABD 14
4331#define NEON_3R_VABA 15
4332#define NEON_3R_VADD_VSUB 16
4333#define NEON_3R_VTST_VCEQ 17
4334#define NEON_3R_VML 18 /* VMLA, VMLAL, VMLS, VMLSL */
4335#define NEON_3R_VMUL 19
4336#define NEON_3R_VPMAX 20
4337#define NEON_3R_VPMIN 21
4338#define NEON_3R_VQDMULH_VQRDMULH 22
4339#define NEON_3R_VPADD 23
4340#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
4341#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
4342#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
4343#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
4344#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
4345#define NEON_3R_VRECPS_VRSQRTS 31 /* float VRECPS, VRSQRTS */
4346
4347static const uint8_t neon_3r_sizes[] = {
4348    [NEON_3R_VHADD] = 0x7,
4349    [NEON_3R_VQADD] = 0xf,
4350    [NEON_3R_VRHADD] = 0x7,
4351    [NEON_3R_LOGIC] = 0xf, /* size field encodes op type */
4352    [NEON_3R_VHSUB] = 0x7,
4353    [NEON_3R_VQSUB] = 0xf,
4354    [NEON_3R_VCGT] = 0x7,
4355    [NEON_3R_VCGE] = 0x7,
4356    [NEON_3R_VSHL] = 0xf,
4357    [NEON_3R_VQSHL] = 0xf,
4358    [NEON_3R_VRSHL] = 0xf,
4359    [NEON_3R_VQRSHL] = 0xf,
4360    [NEON_3R_VMAX] = 0x7,
4361    [NEON_3R_VMIN] = 0x7,
4362    [NEON_3R_VABD] = 0x7,
4363    [NEON_3R_VABA] = 0x7,
4364    [NEON_3R_VADD_VSUB] = 0xf,
4365    [NEON_3R_VTST_VCEQ] = 0x7,
4366    [NEON_3R_VML] = 0x7,
4367    [NEON_3R_VMUL] = 0x7,
4368    [NEON_3R_VPMAX] = 0x7,
4369    [NEON_3R_VPMIN] = 0x7,
4370    [NEON_3R_VQDMULH_VQRDMULH] = 0x6,
4371    [NEON_3R_VPADD] = 0x7,
4372    [NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
4373    [NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
4374    [NEON_3R_FLOAT_CMP] = 0x5, /* size bit 1 encodes op */
4375    [NEON_3R_FLOAT_ACMP] = 0x5, /* size bit 1 encodes op */
4376    [NEON_3R_FLOAT_MINMAX] = 0x5, /* size bit 1 encodes op */
4377    [NEON_3R_VRECPS_VRSQRTS] = 0x5, /* size bit 1 encodes op */
4378};
4379
4380/* Symbolic constants for op fields for Neon 2-register miscellaneous.
4381 * The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
4382 * table A7-13.
4383 */
4384#define NEON_2RM_VREV64 0
4385#define NEON_2RM_VREV32 1
4386#define NEON_2RM_VREV16 2
4387#define NEON_2RM_VPADDL 4
4388#define NEON_2RM_VPADDL_U 5
4389#define NEON_2RM_VCLS 8
4390#define NEON_2RM_VCLZ 9
4391#define NEON_2RM_VCNT 10
4392#define NEON_2RM_VMVN 11
4393#define NEON_2RM_VPADAL 12
4394#define NEON_2RM_VPADAL_U 13
4395#define NEON_2RM_VQABS 14
4396#define NEON_2RM_VQNEG 15
4397#define NEON_2RM_VCGT0 16
4398#define NEON_2RM_VCGE0 17
4399#define NEON_2RM_VCEQ0 18
4400#define NEON_2RM_VCLE0 19
4401#define NEON_2RM_VCLT0 20
4402#define NEON_2RM_VABS 22
4403#define NEON_2RM_VNEG 23
4404#define NEON_2RM_VCGT0_F 24
4405#define NEON_2RM_VCGE0_F 25
4406#define NEON_2RM_VCEQ0_F 26
4407#define NEON_2RM_VCLE0_F 27
4408#define NEON_2RM_VCLT0_F 28
4409#define NEON_2RM_VABS_F 30
4410#define NEON_2RM_VNEG_F 31
4411#define NEON_2RM_VSWP 32
4412#define NEON_2RM_VTRN 33
4413#define NEON_2RM_VUZP 34
4414#define NEON_2RM_VZIP 35
4415#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
4416#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
4417#define NEON_2RM_VSHLL 38
4418#define NEON_2RM_VCVT_F16_F32 44
4419#define NEON_2RM_VCVT_F32_F16 46
4420#define NEON_2RM_VRECPE 56
4421#define NEON_2RM_VRSQRTE 57
4422#define NEON_2RM_VRECPE_F 58
4423#define NEON_2RM_VRSQRTE_F 59
4424#define NEON_2RM_VCVT_FS 60
4425#define NEON_2RM_VCVT_FU 61
4426#define NEON_2RM_VCVT_SF 62
4427#define NEON_2RM_VCVT_UF 63
4428
4429static int neon_2rm_is_float_op(int op)
4430{
4431    /* Return true if this neon 2reg-misc op is float-to-float */
4432    return (op == NEON_2RM_VABS_F || op == NEON_2RM_VNEG_F ||
4433            op >= NEON_2RM_VRECPE_F);
4434}
4435
4436/* Each entry in this array has bit n set if the insn allows
4437 * size value n (otherwise it will UNDEF). Since unallocated
4438 * op values will have no bits set they always UNDEF.
4439 */
4440static const uint8_t neon_2rm_sizes[] = {
4441    [NEON_2RM_VREV64] = 0x7,
4442    [NEON_2RM_VREV32] = 0x3,
4443    [NEON_2RM_VREV16] = 0x1,
4444    [NEON_2RM_VPADDL] = 0x7,
4445    [NEON_2RM_VPADDL_U] = 0x7,
4446    [NEON_2RM_VCLS] = 0x7,
4447    [NEON_2RM_VCLZ] = 0x7,
4448    [NEON_2RM_VCNT] = 0x1,
4449    [NEON_2RM_VMVN] = 0x1,
4450    [NEON_2RM_VPADAL] = 0x7,
4451    [NEON_2RM_VPADAL_U] = 0x7,
4452    [NEON_2RM_VQABS] = 0x7,
4453    [NEON_2RM_VQNEG] = 0x7,
4454    [NEON_2RM_VCGT0] = 0x7,
4455    [NEON_2RM_VCGE0] = 0x7,
4456    [NEON_2RM_VCEQ0] = 0x7,
4457    [NEON_2RM_VCLE0] = 0x7,
4458    [NEON_2RM_VCLT0] = 0x7,
4459    [NEON_2RM_VABS] = 0x7,
4460    [NEON_2RM_VNEG] = 0x7,
4461    [NEON_2RM_VCGT0_F] = 0x4,
4462    [NEON_2RM_VCGE0_F] = 0x4,
4463    [NEON_2RM_VCEQ0_F] = 0x4,
4464    [NEON_2RM_VCLE0_F] = 0x4,
4465    [NEON_2RM_VCLT0_F] = 0x4,
4466    [NEON_2RM_VABS_F] = 0x4,
4467    [NEON_2RM_VNEG_F] = 0x4,
4468    [NEON_2RM_VSWP] = 0x1,
4469    [NEON_2RM_VTRN] = 0x7,
4470    [NEON_2RM_VUZP] = 0x7,
4471    [NEON_2RM_VZIP] = 0x7,
4472    [NEON_2RM_VMOVN] = 0x7,
4473    [NEON_2RM_VQMOVN] = 0x7,
4474    [NEON_2RM_VSHLL] = 0x7,
4475    [NEON_2RM_VCVT_F16_F32] = 0x2,
4476    [NEON_2RM_VCVT_F32_F16] = 0x2,
4477    [NEON_2RM_VRECPE] = 0x4,
4478    [NEON_2RM_VRSQRTE] = 0x4,
4479    [NEON_2RM_VRECPE_F] = 0x4,
4480    [NEON_2RM_VRSQRTE_F] = 0x4,
4481    [NEON_2RM_VCVT_FS] = 0x4,
4482    [NEON_2RM_VCVT_FU] = 0x4,
4483    [NEON_2RM_VCVT_SF] = 0x4,
4484    [NEON_2RM_VCVT_UF] = 0x4,
4485};
4486
4487/* Translate a NEON data processing instruction.  Return nonzero if the
4488   instruction is invalid.
4489   We process data in a mixture of 32-bit and 64-bit chunks.
4490   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
4491
4492static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t insn)
4493{
4494    int op;
4495    int q;
4496    int rd, rn, rm;
4497    int size;
4498    int shift;
4499    int pass;
4500    int count;
4501    int pairwise;
4502    int u;
4503    uint32_t imm, mask;
4504    TCGv tmp, tmp2, tmp3, tmp4, tmp5;
4505    TCGv_i64 tmp64;
4506
4507    if (!s->vfp_enabled)
4508      return 1;
4509    q = (insn & (1 << 6)) != 0;
4510    u = (insn >> 24) & 1;
4511    VFP_DREG_D(rd, insn);
4512    VFP_DREG_N(rn, insn);
4513    VFP_DREG_M(rm, insn);
4514    size = (insn >> 20) & 3;
4515    if ((insn & (1 << 23)) == 0) {
4516        /* Three register same length.  */
4517        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4518        /* Catch invalid op and bad size combinations: UNDEF */
4519        if ((neon_3r_sizes[op] & (1 << size)) == 0) {
4520            return 1;
4521        }
4522        /* All insns of this form UNDEF for either this condition or the
4523         * superset of cases "Q==1"; we catch the latter later.
4524         */
4525        if (q && ((rd | rn | rm) & 1)) {
4526            return 1;
4527        }
4528        if (size == 3 && op != NEON_3R_LOGIC) {
4529            /* 64-bit element instructions. */
4530            for (pass = 0; pass < (q ? 2 : 1); pass++) {
4531                neon_load_reg64(cpu_V0, rn + pass);
4532                neon_load_reg64(cpu_V1, rm + pass);
4533                switch (op) {
4534                case NEON_3R_VQADD:
4535                    if (u) {
4536                        gen_helper_neon_qadd_u64(cpu_V0, cpu_env,
4537                                                 cpu_V0, cpu_V1);
4538                    } else {
4539                        gen_helper_neon_qadd_s64(cpu_V0, cpu_env,
4540                                                 cpu_V0, cpu_V1);
4541                    }
4542                    break;
4543                case NEON_3R_VQSUB:
4544                    if (u) {
4545                        gen_helper_neon_qsub_u64(cpu_V0, cpu_env,
4546                                                 cpu_V0, cpu_V1);
4547                    } else {
4548                        gen_helper_neon_qsub_s64(cpu_V0, cpu_env,
4549                                                 cpu_V0, cpu_V1);
4550                    }
4551                    break;
4552                case NEON_3R_VSHL:
4553                    if (u) {
4554                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4555                    } else {
4556                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4557                    }
4558                    break;
4559                case NEON_3R_VQSHL:
4560                    if (u) {
4561                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4562                                                 cpu_V1, cpu_V0);
4563                    } else {
4564                        gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4565                                                 cpu_V1, cpu_V0);
4566                    }
4567                    break;
4568                case NEON_3R_VRSHL:
4569                    if (u) {
4570                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4571                    } else {
4572                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4573                    }
4574                    break;
4575                case NEON_3R_VQRSHL:
4576                    if (u) {
4577                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4578                                                  cpu_V1, cpu_V0);
4579                    } else {
4580                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4581                                                  cpu_V1, cpu_V0);
4582                    }
4583                    break;
4584                case NEON_3R_VADD_VSUB:
4585                    if (u) {
4586                        tcg_gen_sub_i64(CPU_V001);
4587                    } else {
4588                        tcg_gen_add_i64(CPU_V001);
4589                    }
4590                    break;
4591                default:
4592                    abort();
4593                }
4594                neon_store_reg64(cpu_V0, rd + pass);
4595            }
4596            return 0;
4597        }
4598        pairwise = 0;
4599        switch (op) {
4600        case NEON_3R_VSHL:
4601        case NEON_3R_VQSHL:
4602        case NEON_3R_VRSHL:
4603        case NEON_3R_VQRSHL:
4604            {
4605                int rtmp;
4606                /* Shift instruction operands are reversed.  */
4607                rtmp = rn;
4608                rn = rm;
4609                rm = rtmp;
4610            }
4611            break;
4612        case NEON_3R_VPADD:
4613            if (u) {
4614                return 1;
4615            }
4616            /* Fall through */
4617        case NEON_3R_VPMAX:
4618        case NEON_3R_VPMIN:
4619            pairwise = 1;
4620            break;
4621        case NEON_3R_FLOAT_ARITH:
4622            pairwise = (u && size < 2); /* if VPADD (float) */
4623            break;
4624        case NEON_3R_FLOAT_MINMAX:
4625            pairwise = u; /* if VPMIN/VPMAX (float) */
4626            break;
4627        case NEON_3R_FLOAT_CMP:
4628            if (!u && size) {
4629                /* no encoding for U=0 C=1x */
4630                return 1;
4631            }
4632            break;
4633        case NEON_3R_FLOAT_ACMP:
4634            if (!u) {
4635                return 1;
4636            }
4637            break;
4638        case NEON_3R_VRECPS_VRSQRTS:
4639            if (u) {
4640                return 1;
4641            }
4642            break;
4643        case NEON_3R_VMUL:
4644            if (u && (size != 0)) {
4645                /* UNDEF on invalid size for polynomial subcase */
4646                return 1;
4647            }
4648            break;
4649        default:
4650            break;
4651        }
4652
4653        if (pairwise && q) {
4654            /* All the pairwise insns UNDEF if Q is set */
4655            return 1;
4656        }
4657
4658        for (pass = 0; pass < (q ? 4 : 2); pass++) {
4659
4660        if (pairwise) {
4661            /* Pairwise.  */
4662            if (pass < 1) {
4663                tmp = neon_load_reg(rn, 0);
4664                tmp2 = neon_load_reg(rn, 1);
4665            } else {
4666                tmp = neon_load_reg(rm, 0);
4667                tmp2 = neon_load_reg(rm, 1);
4668            }
4669        } else {
4670            /* Elementwise.  */
4671            tmp = neon_load_reg(rn, pass);
4672            tmp2 = neon_load_reg(rm, pass);
4673        }
4674        switch (op) {
4675        case NEON_3R_VHADD:
4676            GEN_NEON_INTEGER_OP(hadd);
4677            break;
4678        case NEON_3R_VQADD:
4679            GEN_NEON_INTEGER_OP_ENV(qadd);
4680            break;
4681        case NEON_3R_VRHADD:
4682            GEN_NEON_INTEGER_OP(rhadd);
4683            break;
4684        case NEON_3R_LOGIC: /* Logic ops.  */
4685            switch ((u << 2) | size) {
4686            case 0: /* VAND */
4687                tcg_gen_and_i32(tmp, tmp, tmp2);
4688                break;
4689            case 1: /* BIC */
4690                tcg_gen_andc_i32(tmp, tmp, tmp2);
4691                break;
4692            case 2: /* VORR */
4693                tcg_gen_or_i32(tmp, tmp, tmp2);
4694                break;
4695            case 3: /* VORN */
4696                tcg_gen_orc_i32(tmp, tmp, tmp2);
4697                break;
4698            case 4: /* VEOR */
4699                tcg_gen_xor_i32(tmp, tmp, tmp2);
4700                break;
4701            case 5: /* VBSL */
4702                tmp3 = neon_load_reg(rd, pass);
4703                gen_neon_bsl(tmp, tmp, tmp2, tmp3);
4704                tcg_temp_free_i32(tmp3);
4705                break;
4706            case 6: /* VBIT */
4707                tmp3 = neon_load_reg(rd, pass);
4708                gen_neon_bsl(tmp, tmp, tmp3, tmp2);
4709                tcg_temp_free_i32(tmp3);
4710                break;
4711            case 7: /* VBIF */
4712                tmp3 = neon_load_reg(rd, pass);
4713                gen_neon_bsl(tmp, tmp3, tmp, tmp2);
4714                tcg_temp_free_i32(tmp3);
4715                break;
4716            }
4717            break;
4718        case NEON_3R_VHSUB:
4719            GEN_NEON_INTEGER_OP(hsub);
4720            break;
4721        case NEON_3R_VQSUB:
4722            GEN_NEON_INTEGER_OP_ENV(qsub);
4723            break;
4724        case NEON_3R_VCGT:
4725            GEN_NEON_INTEGER_OP(cgt);
4726            break;
4727        case NEON_3R_VCGE:
4728            GEN_NEON_INTEGER_OP(cge);
4729            break;
4730        case NEON_3R_VSHL:
4731            GEN_NEON_INTEGER_OP(shl);
4732            break;
4733        case NEON_3R_VQSHL:
4734            GEN_NEON_INTEGER_OP_ENV(qshl);
4735            break;
4736        case NEON_3R_VRSHL:
4737            GEN_NEON_INTEGER_OP(rshl);
4738            break;
4739        case NEON_3R_VQRSHL:
4740            GEN_NEON_INTEGER_OP_ENV(qrshl);
4741            break;
4742        case NEON_3R_VMAX:
4743            GEN_NEON_INTEGER_OP(max);
4744            break;
4745        case NEON_3R_VMIN:
4746            GEN_NEON_INTEGER_OP(min);
4747            break;
4748        case NEON_3R_VABD:
4749            GEN_NEON_INTEGER_OP(abd);
4750            break;
4751        case NEON_3R_VABA:
4752            GEN_NEON_INTEGER_OP(abd);
4753            tcg_temp_free_i32(tmp2);
4754            tmp2 = neon_load_reg(rd, pass);
4755            gen_neon_add(size, tmp, tmp2);
4756            break;
4757        case NEON_3R_VADD_VSUB:
4758            if (!u) { /* VADD */
4759                gen_neon_add(size, tmp, tmp2);
4760            } else { /* VSUB */
4761                switch (size) {
4762                case 0: gen_helper_neon_sub_u8(tmp, tmp, tmp2); break;
4763                case 1: gen_helper_neon_sub_u16(tmp, tmp, tmp2); break;
4764                case 2: tcg_gen_sub_i32(tmp, tmp, tmp2); break;
4765                default: abort();
4766                }
4767            }
4768            break;
4769        case NEON_3R_VTST_VCEQ:
4770            if (!u) { /* VTST */
4771                switch (size) {
4772                case 0: gen_helper_neon_tst_u8(tmp, tmp, tmp2); break;
4773                case 1: gen_helper_neon_tst_u16(tmp, tmp, tmp2); break;
4774                case 2: gen_helper_neon_tst_u32(tmp, tmp, tmp2); break;
4775                default: abort();
4776                }
4777            } else { /* VCEQ */
4778                switch (size) {
4779                case 0: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
4780                case 1: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
4781                case 2: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
4782                default: abort();
4783                }
4784            }
4785            break;
4786        case NEON_3R_VML: /* VMLA, VMLAL, VMLS,VMLSL */
4787            switch (size) {
4788            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
4789            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
4790            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4791            default: abort();
4792            }
4793            tcg_temp_free_i32(tmp2);
4794            tmp2 = neon_load_reg(rd, pass);
4795            if (u) { /* VMLS */
4796                gen_neon_rsb(size, tmp, tmp2);
4797            } else { /* VMLA */
4798                gen_neon_add(size, tmp, tmp2);
4799            }
4800            break;
4801        case NEON_3R_VMUL:
4802            if (u) { /* polynomial */
4803                gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4804            } else { /* Integer */
4805                switch (size) {
4806                case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
4807                case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
4808                case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
4809                default: abort();
4810                }
4811            }
4812            break;
4813        case NEON_3R_VPMAX:
4814            GEN_NEON_INTEGER_OP(pmax);
4815            break;
4816        case NEON_3R_VPMIN:
4817            GEN_NEON_INTEGER_OP(pmin);
4818            break;
4819        case NEON_3R_VQDMULH_VQRDMULH: /* Multiply high.  */
4820            if (!u) { /* VQDMULH */
4821                switch (size) {
4822                case 1:
4823                    gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4824                    break;
4825                case 2:
4826                    gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4827                    break;
4828                default: abort();
4829                }
4830            } else { /* VQRDMULH */
4831                switch (size) {
4832                case 1:
4833                    gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4834                    break;
4835                case 2:
4836                    gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4837                    break;
4838                default: abort();
4839                }
4840            }
4841            break;
4842        case NEON_3R_VPADD:
4843            switch (size) {
4844            case 0: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4845            case 1: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4846            case 2: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4847            default: abort();
4848            }
4849            break;
4850        case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
4851        {
4852            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4853            switch ((u << 2) | size) {
4854            case 0: /* VADD */
4855            case 4: /* VPADD */
4856                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4857                break;
4858            case 2: /* VSUB */
4859                gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
4860                break;
4861            case 6: /* VABD */
4862                gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
4863                break;
4864            default:
4865                abort();
4866            }
4867            tcg_temp_free_ptr(fpstatus);
4868            break;
4869        }
4870        case NEON_3R_FLOAT_MULTIPLY:
4871        {
4872            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4873            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
4874            if (!u) {
4875                tcg_temp_free_i32(tmp2);
4876                tmp2 = neon_load_reg(rd, pass);
4877                if (size == 0) {
4878                    gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4879                } else {
4880                    gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
4881                }
4882            }
4883            tcg_temp_free_ptr(fpstatus);
4884            break;
4885        }
4886        case NEON_3R_FLOAT_CMP:
4887        {
4888            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4889            if (!u) {
4890                gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
4891            } else {
4892                if (size == 0) {
4893                    gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
4894                } else {
4895                    gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
4896                }
4897            }
4898            tcg_temp_free_ptr(fpstatus);
4899            break;
4900        }
4901        case NEON_3R_FLOAT_ACMP:
4902        {
4903            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4904            if (size == 0) {
4905                gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
4906            } else {
4907                gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
4908            }
4909            tcg_temp_free_ptr(fpstatus);
4910            break;
4911        }
4912        case NEON_3R_FLOAT_MINMAX:
4913        {
4914            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
4915            if (size == 0) {
4916                gen_helper_neon_max_f32(tmp, tmp, tmp2, fpstatus);
4917            } else {
4918                gen_helper_neon_min_f32(tmp, tmp, tmp2, fpstatus);
4919            }
4920            tcg_temp_free_ptr(fpstatus);
4921            break;
4922        }
4923        case NEON_3R_VRECPS_VRSQRTS:
4924            if (size == 0)
4925                gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
4926            else
4927                gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
4928            break;
4929        default:
4930            abort();
4931        }
4932        tcg_temp_free_i32(tmp2);
4933
4934        /* Save the result.  For elementwise operations we can put it
4935           straight into the destination register.  For pairwise operations
4936           we have to be careful to avoid clobbering the source operands.  */
4937        if (pairwise && rd == rm) {
4938            neon_store_scratch(pass, tmp);
4939        } else {
4940            neon_store_reg(rd, pass, tmp);
4941        }
4942
4943        } /* for pass */
4944        if (pairwise && rd == rm) {
4945            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4946                tmp = neon_load_scratch(pass);
4947                neon_store_reg(rd, pass, tmp);
4948            }
4949        }
4950        /* End of 3 register same size operations.  */
4951    } else if (insn & (1 << 4)) {
4952        if ((insn & 0x00380080) != 0) {
4953            /* Two registers and shift.  */
4954            op = (insn >> 8) & 0xf;
4955            if (insn & (1 << 7)) {
4956                /* 64-bit shift. */
4957                if (op > 7) {
4958                    return 1;
4959                }
4960                size = 3;
4961            } else {
4962                size = 2;
4963                while ((insn & (1 << (size + 19))) == 0)
4964                    size--;
4965            }
4966            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4967            /* To avoid excessive dumplication of ops we implement shift
4968               by immediate using the variable shift operations.  */
4969            if (op < 8) {
4970                /* Shift by immediate:
4971                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
4972                if (q && ((rd | rm) & 1)) {
4973                    return 1;
4974                }
4975                if (!u && (op == 4 || op == 6)) {
4976                    return 1;
4977                }
4978                /* Right shifts are encoded as N - shift, where N is the
4979                   element size in bits.  */
4980                if (op <= 4)
4981                    shift = shift - (1 << (size + 3));
4982                if (size == 3) {
4983                    count = q + 1;
4984                } else {
4985                    count = q ? 4: 2;
4986                }
4987                switch (size) {
4988                case 0:
4989                    imm = (uint8_t) shift;
4990                    imm |= imm << 8;
4991                    imm |= imm << 16;
4992                    break;
4993                case 1:
4994                    imm = (uint16_t) shift;
4995                    imm |= imm << 16;
4996                    break;
4997                case 2:
4998                case 3:
4999                    imm = shift;
5000                    break;
5001                default:
5002                    abort();
5003                }
5004
5005                for (pass = 0; pass < count; pass++) {
5006                    if (size == 3) {
5007                        neon_load_reg64(cpu_V0, rm + pass);
5008                        tcg_gen_movi_i64(cpu_V1, imm);
5009                        switch (op) {
5010                        case 0:  /* VSHR */
5011                        case 1:  /* VSRA */
5012                            if (u)
5013                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5014                            else
5015                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
5016                            break;
5017                        case 2: /* VRSHR */
5018                        case 3: /* VRSRA */
5019                            if (u)
5020                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5021                            else
5022                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5023                            break;
5024                        case 4: /* VSRI */
5025                        case 5: /* VSHL, VSLI */
5026                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
5027                            break;
5028                        case 6: /* VQSHLU */
5029                            gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5030                                                      cpu_V0, cpu_V1);
5031                            break;
5032                        case 7: /* VQSHL */
5033                            if (u) {
5034                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5035                                                         cpu_V0, cpu_V1);
5036                            } else {
5037                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5038                                                         cpu_V0, cpu_V1);
5039                            }
5040                            break;
5041                        }
5042                        if (op == 1 || op == 3) {
5043                            /* Accumulate.  */
5044                            neon_load_reg64(cpu_V1, rd + pass);
5045                            tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5046                        } else if (op == 4 || (op == 5 && u)) {
5047                            /* Insert */
5048                            neon_load_reg64(cpu_V1, rd + pass);
5049                            uint64_t mask;
5050                            if (shift < -63 || shift > 63) {
5051                                mask = 0;
5052                            } else {
5053                                if (op == 4) {
5054                                    mask = 0xffffffffffffffffull >> -shift;
5055                                } else {
5056                                    mask = 0xffffffffffffffffull << shift;
5057                                }
5058                            }
5059                            tcg_gen_andi_i64(cpu_V1, cpu_V1, ~mask);
5060                            tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5061                        }
5062                        neon_store_reg64(cpu_V0, rd + pass);
5063                    } else { /* size < 3 */
5064                        /* Operands in T0 and T1.  */
5065                        tmp = neon_load_reg(rm, pass);
5066                        tmp2 = tcg_const_i32(imm);
5067                        switch (op) {
5068                        case 0:  /* VSHR */
5069                        case 1:  /* VSRA */
5070                            GEN_NEON_INTEGER_OP(shl);
5071                            break;
5072                        case 2: /* VRSHR */
5073                        case 3: /* VRSRA */
5074                            GEN_NEON_INTEGER_OP(rshl);
5075                            break;
5076                        case 4: /* VSRI */
5077                        case 5: /* VSHL, VSLI */
5078                            switch (size) {
5079                            case 0: gen_helper_neon_shl_u8(tmp, tmp, tmp2); break;
5080                            case 1: gen_helper_neon_shl_u16(tmp, tmp, tmp2); break;
5081                            case 2: gen_helper_neon_shl_u32(tmp, tmp, tmp2); break;
5082                            default: abort();
5083                            }
5084                            break;
5085                        case 6: /* VQSHLU */
5086                            switch (size) {
5087                            case 0:
5088                                gen_helper_neon_qshlu_s8(tmp, cpu_env,
5089                                                         tmp, tmp2);
5090                                break;
5091                            case 1:
5092                                gen_helper_neon_qshlu_s16(tmp, cpu_env,
5093                                                          tmp, tmp2);
5094                                break;
5095                            case 2:
5096                                gen_helper_neon_qshlu_s32(tmp, cpu_env,
5097                                                          tmp, tmp2);
5098                                break;
5099                            default:
5100                                abort();
5101                            }
5102                            break;
5103                        case 7: /* VQSHL */
5104                            GEN_NEON_INTEGER_OP_ENV(qshl);
5105                            break;
5106                        }
5107                        tcg_temp_free_i32(tmp2);
5108
5109                        if (op == 1 || op == 3) {
5110                            /* Accumulate.  */
5111                            tmp2 = neon_load_reg(rd, pass);
5112                            gen_neon_add(size, tmp, tmp2);
5113                            tcg_temp_free_i32(tmp2);
5114                        } else if (op == 4 || (op == 5 && u)) {
5115                            /* Insert */
5116                            switch (size) {
5117                            case 0:
5118                                if (op == 4)
5119                                    mask = 0xff >> -shift;
5120                                else
5121                                    mask = (uint8_t)(0xff << shift);
5122                                mask |= mask << 8;
5123                                mask |= mask << 16;
5124                                break;
5125                            case 1:
5126                                if (op == 4)
5127                                    mask = 0xffff >> -shift;
5128                                else
5129                                    mask = (uint16_t)(0xffff << shift);
5130                                mask |= mask << 16;
5131                                break;
5132                            case 2:
5133                                if (shift < -31 || shift > 31) {
5134                                    mask = 0;
5135                                } else {
5136                                    if (op == 4)
5137                                        mask = 0xffffffffu >> -shift;
5138                                    else
5139                                        mask = 0xffffffffu << shift;
5140                                }
5141                                break;
5142                            default:
5143                                abort();
5144                            }
5145                            tmp2 = neon_load_reg(rd, pass);
5146                            tcg_gen_andi_i32(tmp, tmp, mask);
5147                            tcg_gen_andi_i32(tmp2, tmp2, ~mask);
5148                            tcg_gen_or_i32(tmp, tmp, tmp2);
5149                            tcg_temp_free_i32(tmp2);
5150                        }
5151                        neon_store_reg(rd, pass, tmp);
5152                    }
5153                } /* for pass */
5154            } else if (op < 10) {
5155                /* Shift by immediate and narrow:
5156                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
5157                int input_unsigned = (op == 8) ? !u : u;
5158                if (rm & 1) {
5159                    return 1;
5160                }
5161                shift = shift - (1 << (size + 3));
5162                size++;
5163                if (size == 3) {
5164                    tmp64 = tcg_const_i64(shift);
5165                    neon_load_reg64(cpu_V0, rm);
5166                    neon_load_reg64(cpu_V1, rm + 1);
5167                    for (pass = 0; pass < 2; pass++) {
5168                        TCGv_i64 in;
5169                        if (pass == 0) {
5170                            in = cpu_V0;
5171                        } else {
5172                            in = cpu_V1;
5173                        }
5174                        if (q) {
5175                            if (input_unsigned) {
5176                                gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5177                            } else {
5178                                gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5179                            }
5180                        } else {
5181                            if (input_unsigned) {
5182                                gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5183                            } else {
5184                                gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5185                            }
5186                        }
5187                        tmp = tcg_temp_new_i32();
5188                        gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5189                        neon_store_reg(rd, pass, tmp);
5190                    } /* for pass */
5191                    tcg_temp_free_i64(tmp64);
5192                } else {
5193                    if (size == 1) {
5194                        imm = (uint16_t)shift;
5195                        imm |= imm << 16;
5196                    } else {
5197                        /* size == 2 */
5198                        imm = (uint32_t)shift;
5199                    }
5200                    tmp2 = tcg_const_i32(imm);
5201                    tmp4 = neon_load_reg(rm + 1, 0);
5202                    tmp5 = neon_load_reg(rm + 1, 1);
5203                    for (pass = 0; pass < 2; pass++) {
5204                        if (pass == 0) {
5205                            tmp = neon_load_reg(rm, 0);
5206                        } else {
5207                            tmp = tmp4;
5208                        }
5209                        gen_neon_shift_narrow(size, tmp, tmp2, q,
5210                                              input_unsigned);
5211                        if (pass == 0) {
5212                            tmp3 = neon_load_reg(rm, 1);
5213                        } else {
5214                            tmp3 = tmp5;
5215                        }
5216                        gen_neon_shift_narrow(size, tmp3, tmp2, q,
5217                                              input_unsigned);
5218                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5219                        tcg_temp_free_i32(tmp);
5220                        tcg_temp_free_i32(tmp3);
5221                        tmp = tcg_temp_new_i32();
5222                        gen_neon_narrow_op(op == 8, u, size - 1, tmp, cpu_V0);
5223                        neon_store_reg(rd, pass, tmp);
5224                    } /* for pass */
5225                    tcg_temp_free_i32(tmp2);
5226                }
5227            } else if (op == 10) {
5228                /* VSHLL, VMOVL */
5229                if (q || (rd & 1)) {
5230                    return 1;
5231                }
5232                tmp = neon_load_reg(rm, 0);
5233                tmp2 = neon_load_reg(rm, 1);
5234                for (pass = 0; pass < 2; pass++) {
5235                    if (pass == 1)
5236                        tmp = tmp2;
5237
5238                    gen_neon_widen(cpu_V0, tmp, size, u);
5239
5240                    if (shift != 0) {
5241                        /* The shift is less than the width of the source
5242                           type, so we can just shift the whole register.  */
5243                        tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5244                        /* Widen the result of shift: we need to clear
5245                         * the potential overflow bits resulting from
5246                         * left bits of the narrow input appearing as
5247                         * right bits of left the neighbour narrow
5248                         * input.  */
5249                        if (size < 2 || !u) {
5250                            uint64_t imm64;
5251                            if (size == 0) {
5252                                imm = (0xffu >> (8 - shift));
5253                                imm |= imm << 16;
5254                            } else if (size == 1) {
5255                                imm = 0xffff >> (16 - shift);
5256                            } else {
5257                                /* size == 2 */
5258                                imm = 0xffffffff >> (32 - shift);
5259                            }
5260                            if (size < 2) {
5261                                imm64 = imm | (((uint64_t)imm) << 32);
5262                            } else {
5263                                imm64 = imm;
5264                            }
5265                            tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5266                        }
5267                    }
5268                    neon_store_reg64(cpu_V0, rd + pass);
5269                }
5270            } else if (op >= 14) {
5271                /* VCVT fixed-point.  */
5272                if (!(insn & (1 << 21)) || (q && ((rd | rm) & 1))) {
5273                    return 1;
5274                }
5275                /* We have already masked out the must-be-1 top bit of imm6,
5276                 * hence this 32-shift where the ARM ARM has 64-imm6.
5277                 */
5278                shift = 32 - shift;
5279                for (pass = 0; pass < (q ? 4 : 2); pass++) {
5280                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
5281                    if (!(op & 1)) {
5282                        if (u)
5283                            gen_vfp_ulto(0, shift, 1);
5284                        else
5285                            gen_vfp_slto(0, shift, 1);
5286                    } else {
5287                        if (u)
5288                            gen_vfp_toul(0, shift, 1);
5289                        else
5290                            gen_vfp_tosl(0, shift, 1);
5291                    }
5292                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
5293                }
5294            } else {
5295                return 1;
5296            }
5297        } else { /* (insn & 0x00380080) == 0 */
5298            int invert;
5299            if (q && (rd & 1)) {
5300                return 1;
5301            }
5302
5303            op = (insn >> 8) & 0xf;
5304            /* One register and immediate.  */
5305            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
5306            invert = (insn & (1 << 5)) != 0;
5307            /* Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.
5308             * We choose to not special-case this and will behave as if a
5309             * valid constant encoding of 0 had been given.
5310             */
5311            switch (op) {
5312            case 0: case 1:
5313                /* no-op */
5314                break;
5315            case 2: case 3:
5316                imm <<= 8;
5317                break;
5318            case 4: case 5:
5319                imm <<= 16;
5320                break;
5321            case 6: case 7:
5322                imm <<= 24;
5323                break;
5324            case 8: case 9:
5325                imm |= imm << 16;
5326                break;
5327            case 10: case 11:
5328                imm = (imm << 8) | (imm << 24);
5329                break;
5330            case 12:
5331                imm = (imm << 8) | 0xff;
5332                break;
5333            case 13:
5334                imm = (imm << 16) | 0xffff;
5335                break;
5336            case 14:
5337                imm |= (imm << 8) | (imm << 16) | (imm << 24);
5338                if (invert)
5339                    imm = ~imm;
5340                break;
5341            case 15:
5342                if (invert) {
5343                    return 1;
5344                }
5345                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
5346                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
5347                break;
5348            }
5349            if (invert)
5350                imm = ~imm;
5351
5352            for (pass = 0; pass < (q ? 4 : 2); pass++) {
5353                if (op & 1 && op < 12) {
5354                    tmp = neon_load_reg(rd, pass);
5355                    if (invert) {
5356                        /* The immediate value has already been inverted, so
5357                           BIC becomes AND.  */
5358                        tcg_gen_andi_i32(tmp, tmp, imm);
5359                    } else {
5360                        tcg_gen_ori_i32(tmp, tmp, imm);
5361                    }
5362                } else {
5363                    /* VMOV, VMVN.  */
5364                    tmp = tcg_temp_new_i32();
5365                    if (op == 14 && invert) {
5366                        int n;
5367                        uint32_t val;
5368                        val = 0;
5369                        for (n = 0; n < 4; n++) {
5370                            if (imm & (1 << (n + (pass & 1) * 4)))
5371                                val |= 0xff << (n * 8);
5372                        }
5373                        tcg_gen_movi_i32(tmp, val);
5374                    } else {
5375                        tcg_gen_movi_i32(tmp, imm);
5376                    }
5377                }
5378                neon_store_reg(rd, pass, tmp);
5379            }
5380        }
5381    } else { /* (insn & 0x00800010 == 0x00800000) */
5382        if (size != 3) {
5383            op = (insn >> 8) & 0xf;
5384            if ((insn & (1 << 6)) == 0) {
5385                /* Three registers of different lengths.  */
5386                int src1_wide;
5387                int src2_wide;
5388                int prewiden;
5389                /* undefreq: bit 0 : UNDEF if size != 0
5390                 *           bit 1 : UNDEF if size == 0
5391                 *           bit 2 : UNDEF if U == 1
5392                 * Note that [1:0] set implies 'always UNDEF'
5393                 */
5394                int undefreq;
5395                /* prewiden, src1_wide, src2_wide, undefreq */
5396                static const int neon_3reg_wide[16][4] = {
5397                    {1, 0, 0, 0}, /* VADDL */
5398                    {1, 1, 0, 0}, /* VADDW */
5399                    {1, 0, 0, 0}, /* VSUBL */
5400                    {1, 1, 0, 0}, /* VSUBW */
5401                    {0, 1, 1, 0}, /* VADDHN */
5402                    {0, 0, 0, 0}, /* VABAL */
5403                    {0, 1, 1, 0}, /* VSUBHN */
5404                    {0, 0, 0, 0}, /* VABDL */
5405                    {0, 0, 0, 0}, /* VMLAL */
5406                    {0, 0, 0, 6}, /* VQDMLAL */
5407                    {0, 0, 0, 0}, /* VMLSL */
5408                    {0, 0, 0, 6}, /* VQDMLSL */
5409                    {0, 0, 0, 0}, /* Integer VMULL */
5410                    {0, 0, 0, 2}, /* VQDMULL */
5411                    {0, 0, 0, 5}, /* Polynomial VMULL */
5412                    {0, 0, 0, 3}, /* Reserved: always UNDEF */
5413                };
5414
5415                prewiden = neon_3reg_wide[op][0];
5416                src1_wide = neon_3reg_wide[op][1];
5417                src2_wide = neon_3reg_wide[op][2];
5418                undefreq = neon_3reg_wide[op][3];
5419
5420                if (((undefreq & 1) && (size != 0)) ||
5421                    ((undefreq & 2) && (size == 0)) ||
5422                    ((undefreq & 4) && u)) {
5423                    return 1;
5424                }
5425                if ((src1_wide && (rn & 1)) ||
5426                    (src2_wide && (rm & 1)) ||
5427                    (!src2_wide && (rd & 1))) {
5428                    return 1;
5429                }
5430
5431                /* Avoid overlapping operands.  Wide source operands are
5432                   always aligned so will never overlap with wide
5433                   destinations in problematic ways.  */
5434                if (rd == rm && !src2_wide) {
5435                    tmp = neon_load_reg(rm, 1);
5436                    neon_store_scratch(2, tmp);
5437                } else if (rd == rn && !src1_wide) {
5438                    tmp = neon_load_reg(rn, 1);
5439                    neon_store_scratch(2, tmp);
5440                }
5441                TCGV_UNUSED(tmp3);
5442                for (pass = 0; pass < 2; pass++) {
5443                    if (src1_wide) {
5444                        neon_load_reg64(cpu_V0, rn + pass);
5445                        TCGV_UNUSED(tmp);
5446                    } else {
5447                        if (pass == 1 && rd == rn) {
5448                            tmp = neon_load_scratch(2);
5449                        } else {
5450                            tmp = neon_load_reg(rn, pass);
5451                        }
5452                        if (prewiden) {
5453                            gen_neon_widen(cpu_V0, tmp, size, u);
5454                        }
5455                    }
5456                    if (src2_wide) {
5457                        neon_load_reg64(cpu_V1, rm + pass);
5458                        TCGV_UNUSED(tmp2);
5459                    } else {
5460                        if (pass == 1 && rd == rm) {
5461                            tmp2 = neon_load_scratch(2);
5462                        } else {
5463                            tmp2 = neon_load_reg(rm, pass);
5464                        }
5465                        if (prewiden) {
5466                            gen_neon_widen(cpu_V1, tmp2, size, u);
5467                        }
5468                    }
5469                    switch (op) {
5470                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
5471                        gen_neon_addl(size);
5472                        break;
5473                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHN, VRSUBHN */
5474                        gen_neon_subl(size);
5475                        break;
5476                    case 5: case 7: /* VABAL, VABDL */
5477                        switch ((size << 1) | u) {
5478                        case 0:
5479                            gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5480                            break;
5481                        case 1:
5482                            gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5483                            break;
5484                        case 2:
5485                            gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5486                            break;
5487                        case 3:
5488                            gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5489                            break;
5490                        case 4:
5491                            gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5492                            break;
5493                        case 5:
5494                            gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5495                            break;
5496                        default: abort();
5497                        }
5498                        tcg_temp_free_i32(tmp2);
5499                        tcg_temp_free_i32(tmp);
5500                        break;
5501                    case 8: case 9: case 10: case 11: case 12: case 13:
5502                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
5503                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5504                        break;
5505                    case 14: /* Polynomial VMULL */
5506                        gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
5507                        tcg_temp_free_i32(tmp2);
5508                        tcg_temp_free_i32(tmp);
5509                        break;
5510                    default: /* 15 is RESERVED: caught earlier  */
5511                        abort();
5512                    }
5513                    if (op == 13) {
5514                        /* VQDMULL */
5515                        gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5516                        neon_store_reg64(cpu_V0, rd + pass);
5517                    } else if (op == 5 || (op >= 8 && op <= 11)) {
5518                        /* Accumulate.  */
5519                        neon_load_reg64(cpu_V1, rd + pass);
5520                        switch (op) {
5521                        case 10: /* VMLSL */
5522                            gen_neon_negl(cpu_V0, size);
5523                            /* Fall through */
5524                        case 5: case 8: /* VABAL, VMLAL */
5525                            gen_neon_addl(size);
5526                            break;
5527                        case 9: case 11: /* VQDMLAL, VQDMLSL */
5528                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5529                            if (op == 11) {
5530                                gen_neon_negl(cpu_V0, size);
5531                            }
5532                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5533                            break;
5534                        default:
5535                            abort();
5536                        }
5537                        neon_store_reg64(cpu_V0, rd + pass);
5538                    } else if (op == 4 || op == 6) {
5539                        /* Narrowing operation.  */
5540                        tmp = tcg_temp_new_i32();
5541                        if (!u) {
5542                            switch (size) {
5543                            case 0:
5544                                gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5545                                break;
5546                            case 1:
5547                                gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5548                                break;
5549                            case 2:
5550                                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5551                                tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5552                                break;
5553                            default: abort();
5554                            }
5555                        } else {
5556                            switch (size) {
5557                            case 0:
5558                                gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5559                                break;
5560                            case 1:
5561                                gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5562                                break;
5563                            case 2:
5564                                tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5565                                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5566                                tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5567                                break;
5568                            default: abort();
5569                            }
5570                        }
5571                        if (pass == 0) {
5572                            tmp3 = tmp;
5573                        } else {
5574                            neon_store_reg(rd, 0, tmp3);
5575                            neon_store_reg(rd, 1, tmp);
5576                        }
5577                    } else {
5578                        /* Write back the result.  */
5579                        neon_store_reg64(cpu_V0, rd + pass);
5580                    }
5581                }
5582            } else {
5583                /* Two registers and a scalar. NB that for ops of this form
5584                 * the ARM ARM labels bit 24 as Q, but it is in our variable
5585                 * 'u', not 'q'.
5586                 */
5587                if (size == 0) {
5588                    return 1;
5589                }
5590                switch (op) {
5591                case 1: /* Float VMLA scalar */
5592                case 5: /* Floating point VMLS scalar */
5593                case 9: /* Floating point VMUL scalar */
5594                    if (size == 1) {
5595                        return 1;
5596                    }
5597                    /* fall through */
5598                case 0: /* Integer VMLA scalar */
5599                case 4: /* Integer VMLS scalar */
5600                case 8: /* Integer VMUL scalar */
5601                case 12: /* VQDMULH scalar */
5602                case 13: /* VQRDMULH scalar */
5603                    if (u && ((rd | rn) & 1)) {
5604                        return 1;
5605                    }
5606                    tmp = neon_get_scalar(size, rm);
5607                    neon_store_scratch(0, tmp);
5608                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
5609                        tmp = neon_load_scratch(0);
5610                        tmp2 = neon_load_reg(rn, pass);
5611                        if (op == 12) {
5612                            if (size == 1) {
5613                                gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5614                            } else {
5615                                gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5616                            }
5617                        } else if (op == 13) {
5618                            if (size == 1) {
5619                                gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5620                            } else {
5621                                gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5622                            }
5623                        } else if (op & 1) {
5624                            TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5625                            gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5626                            tcg_temp_free_ptr(fpstatus);
5627                        } else {
5628                            switch (size) {
5629                            case 0: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5630                            case 1: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5631                            case 2: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5632                            default: abort();
5633                            }
5634                        }
5635                        tcg_temp_free_i32(tmp2);
5636                        if (op < 8) {
5637                            /* Accumulate.  */
5638                            tmp2 = neon_load_reg(rd, pass);
5639                            switch (op) {
5640                            case 0:
5641                                gen_neon_add(size, tmp, tmp2);
5642                                break;
5643                            case 1:
5644                            {
5645                                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5646                                gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5647                                tcg_temp_free_ptr(fpstatus);
5648                                break;
5649                            }
5650                            case 4:
5651                                gen_neon_rsb(size, tmp, tmp2);
5652                                break;
5653                            case 5:
5654                            {
5655                                TCGv_ptr fpstatus = get_fpstatus_ptr(1);
5656                                gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5657                                tcg_temp_free_ptr(fpstatus);
5658                                break;
5659                            }
5660                            default:
5661                                abort();
5662                            }
5663                            tcg_temp_free_i32(tmp2);
5664                        }
5665                        neon_store_reg(rd, pass, tmp);
5666                    }
5667                    break;
5668                case 3: /* VQDMLAL scalar */
5669                case 7: /* VQDMLSL scalar */
5670                case 11: /* VQDMULL scalar */
5671                    if (u == 1) {
5672                        return 1;
5673                    }
5674                    /* fall through */
5675                case 2: /* VMLAL sclar */
5676                case 6: /* VMLSL scalar */
5677                case 10: /* VMULL scalar */
5678                    if (rd & 1) {
5679                        return 1;
5680                    }
5681                    tmp2 = neon_get_scalar(size, rm);
5682                    /* We need a copy of tmp2 because gen_neon_mull
5683                     * deletes it during pass 0.  */
5684                    tmp4 = tcg_temp_new_i32();
5685                    tcg_gen_mov_i32(tmp4, tmp2);
5686                    tmp3 = neon_load_reg(rn, 1);
5687
5688                    for (pass = 0; pass < 2; pass++) {
5689                        if (pass == 0) {
5690                            tmp = neon_load_reg(rn, 0);
5691                        } else {
5692                            tmp = tmp3;
5693                            tmp2 = tmp4;
5694                        }
5695                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5696                        if (op != 11) {
5697                            neon_load_reg64(cpu_V1, rd + pass);
5698                        }
5699                        switch (op) {
5700                        case 6:
5701                            gen_neon_negl(cpu_V0, size);
5702                            /* Fall through */
5703                        case 2:
5704                            gen_neon_addl(size);
5705                            break;
5706                        case 3: case 7:
5707                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5708                            if (op == 7) {
5709                                gen_neon_negl(cpu_V0, size);
5710                            }
5711                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5712                            break;
5713                        case 10:
5714                            /* no-op */
5715                            break;
5716                        case 11:
5717                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5718                            break;
5719                        default:
5720                            abort();
5721                        }
5722                        neon_store_reg64(cpu_V0, rd + pass);
5723                    }
5724
5725
5726                    break;
5727                default: /* 14 and 15 are RESERVED */
5728                    return 1;
5729                }
5730            }
5731        } else { /* size == 3 */
5732            if (!u) {
5733                /* Extract.  */
5734                imm = (insn >> 8) & 0xf;
5735
5736                if (imm > 7 && !q)
5737                    return 1;
5738
5739                if (q && ((rd | rn | rm) & 1)) {
5740                    return 1;
5741                }
5742
5743                if (imm == 0) {
5744                    neon_load_reg64(cpu_V0, rn);
5745                    if (q) {
5746                        neon_load_reg64(cpu_V1, rn + 1);
5747                    }
5748                } else if (imm == 8) {
5749                    neon_load_reg64(cpu_V0, rn + 1);
5750                    if (q) {
5751                        neon_load_reg64(cpu_V1, rm);
5752                    }
5753                } else if (q) {
5754                    tmp64 = tcg_temp_new_i64();
5755                    if (imm < 8) {
5756                        neon_load_reg64(cpu_V0, rn);
5757                        neon_load_reg64(tmp64, rn + 1);
5758                    } else {
5759                        neon_load_reg64(cpu_V0, rn + 1);
5760                        neon_load_reg64(tmp64, rm);
5761                    }
5762                    tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5763                    tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5764                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5765                    if (imm < 8) {
5766                        neon_load_reg64(cpu_V1, rm);
5767                    } else {
5768                        neon_load_reg64(cpu_V1, rm + 1);
5769                        imm -= 8;
5770                    }
5771                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5772                    tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5773                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5774                    tcg_temp_free_i64(tmp64);
5775                } else {
5776                    /* BUGFIX */
5777                    neon_load_reg64(cpu_V0, rn);
5778                    tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5779                    neon_load_reg64(cpu_V1, rm);
5780                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5781                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5782                }
5783                neon_store_reg64(cpu_V0, rd);
5784                if (q) {
5785                    neon_store_reg64(cpu_V1, rd + 1);
5786                }
5787            } else if ((insn & (1 << 11)) == 0) {
5788                /* Two register misc.  */
5789                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5790                size = (insn >> 18) & 3;
5791                /* UNDEF for unknown op values and bad op-size combinations */
5792                if ((neon_2rm_sizes[op] & (1 << size)) == 0) {
5793                    return 1;
5794                }
5795                if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
5796                    q && ((rm | rd) & 1)) {
5797                    return 1;
5798                }
5799                switch (op) {
5800                case NEON_2RM_VREV64:
5801                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
5802                        tmp = neon_load_reg(rm, pass * 2);
5803                        tmp2 = neon_load_reg(rm, pass * 2 + 1);
5804                        switch (size) {
5805                        case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5806                        case 1: gen_swap_half(tmp); break;
5807                        case 2: /* no-op */ break;
5808                        default: abort();
5809                        }
5810                        neon_store_reg(rd, pass * 2 + 1, tmp);
5811                        if (size == 2) {
5812                            neon_store_reg(rd, pass * 2, tmp2);
5813                        } else {
5814                            switch (size) {
5815                            case 0: tcg_gen_bswap32_i32(tmp2, tmp2); break;
5816                            case 1: gen_swap_half(tmp2); break;
5817                            default: abort();
5818                            }
5819                            neon_store_reg(rd, pass * 2, tmp2);
5820                        }
5821                    }
5822                    break;
5823                case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
5824                case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
5825                    for (pass = 0; pass < q + 1; pass++) {
5826                        tmp = neon_load_reg(rm, pass * 2);
5827                        gen_neon_widen(cpu_V0, tmp, size, op & 1);
5828                        tmp = neon_load_reg(rm, pass * 2 + 1);
5829                        gen_neon_widen(cpu_V1, tmp, size, op & 1);
5830                        switch (size) {
5831                        case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5832                        case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5833                        case 2: tcg_gen_add_i64(CPU_V001); break;
5834                        default: abort();
5835                        }
5836                        if (op >= NEON_2RM_VPADAL) {
5837                            /* Accumulate.  */
5838                            neon_load_reg64(cpu_V1, rd + pass);
5839                            gen_neon_addl(size);
5840                        }
5841                        neon_store_reg64(cpu_V0, rd + pass);
5842                    }
5843                    break;
5844                case NEON_2RM_VTRN:
5845                    if (size == 2) {
5846                        int n;
5847                        for (n = 0; n < (q ? 4 : 2); n += 2) {
5848                            tmp = neon_load_reg(rm, n);
5849                            tmp2 = neon_load_reg(rd, n + 1);
5850                            neon_store_reg(rm, n, tmp2);
5851                            neon_store_reg(rd, n + 1, tmp);
5852                        }
5853                    } else {
5854                        goto elementwise;
5855                    }
5856                    break;
5857                case NEON_2RM_VUZP:
5858                    if (gen_neon_unzip(rd, rm, size, q)) {
5859                        return 1;
5860                    }
5861                    break;
5862                case NEON_2RM_VZIP:
5863                    if (gen_neon_zip(rd, rm, size, q)) {
5864                        return 1;
5865                    }
5866                    break;
5867                case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
5868                    /* also VQMOVUN; op field and mnemonics don't line up */
5869                    if (rm & 1) {
5870                        return 1;
5871                    }
5872                    TCGV_UNUSED(tmp2);
5873                    for (pass = 0; pass < 2; pass++) {
5874                        neon_load_reg64(cpu_V0, rm + pass);
5875                        tmp = tcg_temp_new_i32();
5876                        gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
5877                                           tmp, cpu_V0);
5878                        if (pass == 0) {
5879                            tmp2 = tmp;
5880                        } else {
5881                            neon_store_reg(rd, 0, tmp2);
5882                            neon_store_reg(rd, 1, tmp);
5883                        }
5884                    }
5885                    break;
5886                case NEON_2RM_VSHLL:
5887                    if (q || (rd & 1)) {
5888                        return 1;
5889                    }
5890                    tmp = neon_load_reg(rm, 0);
5891                    tmp2 = neon_load_reg(rm, 1);
5892                    for (pass = 0; pass < 2; pass++) {
5893                        if (pass == 1)
5894                            tmp = tmp2;
5895                        gen_neon_widen(cpu_V0, tmp, size, 1);
5896                        tcg_gen_shli_i64(cpu_V0, cpu_V0, 8 << size);
5897                        neon_store_reg64(cpu_V0, rd + pass);
5898                    }
5899                    break;
5900                case NEON_2RM_VCVT_F16_F32:
5901                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
5902                        q || (rm & 1)) {
5903                        return 1;
5904                    }
5905                    tmp = tcg_temp_new_i32();
5906                    tmp2 = tcg_temp_new_i32();
5907                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 0));
5908                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
5909                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 1));
5910                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
5911                    tcg_gen_shli_i32(tmp2, tmp2, 16);
5912                    tcg_gen_or_i32(tmp2, tmp2, tmp);
5913                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 2));
5914                    gen_helper_neon_fcvt_f32_to_f16(tmp, cpu_F0s, cpu_env);
5915                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, 3));
5916                    neon_store_reg(rd, 0, tmp2);
5917                    tmp2 = tcg_temp_new_i32();
5918                    gen_helper_neon_fcvt_f32_to_f16(tmp2, cpu_F0s, cpu_env);
5919                    tcg_gen_shli_i32(tmp2, tmp2, 16);
5920                    tcg_gen_or_i32(tmp2, tmp2, tmp);
5921                    neon_store_reg(rd, 1, tmp2);
5922                    tcg_temp_free_i32(tmp);
5923                    break;
5924                case NEON_2RM_VCVT_F32_F16:
5925                    if (!arm_feature(env, ARM_FEATURE_VFP_FP16) ||
5926                        q || (rd & 1)) {
5927                        return 1;
5928                    }
5929                    tmp3 = tcg_temp_new_i32();
5930                    tmp = neon_load_reg(rm, 0);
5931                    tmp2 = neon_load_reg(rm, 1);
5932                    tcg_gen_ext16u_i32(tmp3, tmp);
5933                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5934                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 0));
5935                    tcg_gen_shri_i32(tmp3, tmp, 16);
5936                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5937                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 1));
5938                    tcg_temp_free_i32(tmp);
5939                    tcg_gen_ext16u_i32(tmp3, tmp2);
5940                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5941                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 2));
5942                    tcg_gen_shri_i32(tmp3, tmp2, 16);
5943                    gen_helper_neon_fcvt_f16_to_f32(cpu_F0s, tmp3, cpu_env);
5944                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, 3));
5945                    tcg_temp_free_i32(tmp2);
5946                    tcg_temp_free_i32(tmp3);
5947                    break;
5948                default:
5949                elementwise:
5950                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
5951                        if (neon_2rm_is_float_op(op)) {
5952                            tcg_gen_ld_f32(cpu_F0s, cpu_env,
5953                                           neon_reg_offset(rm, pass));
5954                            TCGV_UNUSED(tmp);
5955                        } else {
5956                            tmp = neon_load_reg(rm, pass);
5957                        }
5958                        switch (op) {
5959                        case NEON_2RM_VREV32:
5960                            switch (size) {
5961                            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
5962                            case 1: gen_swap_half(tmp); break;
5963                            default: abort();
5964                            }
5965                            break;
5966                        case NEON_2RM_VREV16:
5967                            gen_rev16(tmp);
5968                            break;
5969                        case NEON_2RM_VCLS:
5970                            switch (size) {
5971                            case 0: gen_helper_neon_cls_s8(tmp, tmp); break;
5972                            case 1: gen_helper_neon_cls_s16(tmp, tmp); break;
5973                            case 2: gen_helper_neon_cls_s32(tmp, tmp); break;
5974                            default: abort();
5975                            }
5976                            break;
5977                        case NEON_2RM_VCLZ:
5978                            switch (size) {
5979                            case 0: gen_helper_neon_clz_u8(tmp, tmp); break;
5980                            case 1: gen_helper_neon_clz_u16(tmp, tmp); break;
5981                            case 2: gen_helper_clz(tmp, tmp); break;
5982                            default: abort();
5983                            }
5984                            break;
5985                        case NEON_2RM_VCNT:
5986                            gen_helper_neon_cnt_u8(tmp, tmp);
5987                            break