translate.c revision a577fcadc0b365ee629aec313f57a65d54fe5d89
1/*
2 *  ARM translation
3 *
4 *  Copyright (c) 2003 Fabrice Bellard
5 *  Copyright (c) 2005-2007 CodeSourcery
6 *  Copyright (c) 2007 OpenedHand, Ltd.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16 * Lesser General Public License for more details.
17 *
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with this library; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA  02110-1301 USA
21 */
22#include <stdarg.h>
23#include <stdlib.h>
24#include <stdio.h>
25#include <string.h>
26#include <inttypes.h>
27
28#include "cpu.h"
29#include "exec-all.h"
30#include "disas.h"
31#include "tcg-op.h"
32#include "qemu-log.h"
33
34#ifdef CONFIG_TRACE
35#include "trace.h"
36#endif
37
38#include "helpers.h"
39#define GEN_HELPER 1
40#include "helpers.h"
41
42#define ENABLE_ARCH_5J    0
43#define ENABLE_ARCH_6     arm_feature(env, ARM_FEATURE_V6)
44#define ENABLE_ARCH_6K   arm_feature(env, ARM_FEATURE_V6K)
45#define ENABLE_ARCH_6T2   arm_feature(env, ARM_FEATURE_THUMB2)
46#define ENABLE_ARCH_7     arm_feature(env, ARM_FEATURE_V7)
47
48#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
49
50/* internal defines */
51typedef struct DisasContext {
52    target_ulong pc;
53    int is_jmp;
54    /* Nonzero if this instruction has been conditionally skipped.  */
55    int condjmp;
56    /* The label that will be jumped to when the instruction is skipped.  */
57    int condlabel;
58    /* Thumb-2 condtional execution bits.  */
59    int condexec_mask;
60    int condexec_cond;
61    int condexec_mask_prev;  /* mask at start of instruction/block */
62    struct TranslationBlock *tb;
63    int singlestep_enabled;
64    int thumb;
65#if !defined(CONFIG_USER_ONLY)
66    int user;
67#endif
68} DisasContext;
69
70#if defined(CONFIG_USER_ONLY)
71#define IS_USER(s) 1
72#else
73#define IS_USER(s) (s->user)
74#endif
75
76#ifdef CONFIG_TRACE
77#include "helpers.h"
78#endif /* CONFIG_TRACE */
79
80/* These instructions trap after executing, so defer them until after the
81   conditional executions state has been updated.  */
82#define DISAS_WFI 4
83#define DISAS_SWI 5
84
85static TCGv_ptr cpu_env;
86/* We reuse the same 64-bit temporaries for efficiency.  */
87static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
88
89/* FIXME:  These should be removed.  */
90static TCGv cpu_T[2];
91static TCGv cpu_F0s, cpu_F1s;
92static TCGv_i64 cpu_F0d, cpu_F1d;
93
94#define ICOUNT_TEMP cpu_T[0]
95#include "gen-icount.h"
96
97/* initialize TCG globals.  */
98void arm_translate_init(void)
99{
100    cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
101
102    cpu_T[0] = tcg_global_reg_new_i32(TCG_AREG1, "T0");
103    cpu_T[1] = tcg_global_reg_new_i32(TCG_AREG2, "T1");
104
105#define GEN_HELPER 2
106#include "helpers.h"
107}
108
109/* The code generator doesn't like lots of temporaries, so maintain our own
110   cache for reuse within a function.  */
111#define MAX_TEMPS 8
112static int num_temps;
113static TCGv temps[MAX_TEMPS];
114
115/* Allocate a temporary variable.  */
116static TCGv_i32 new_tmp(void)
117{
118    TCGv tmp;
119    if (num_temps == MAX_TEMPS)
120        abort();
121
122    if (GET_TCGV_I32(temps[num_temps]))
123      return temps[num_temps++];
124
125    tmp = tcg_temp_new_i32();
126    temps[num_temps++] = tmp;
127    return tmp;
128}
129
130/* Release a temporary variable.  */
131static void dead_tmp(TCGv tmp)
132{
133    int i;
134    num_temps--;
135    i = num_temps;
136    if (TCGV_EQUAL(temps[i], tmp))
137        return;
138
139    /* Shuffle this temp to the last slot.  */
140    while (!TCGV_EQUAL(temps[i], tmp))
141        i--;
142    while (i < num_temps) {
143        temps[i] = temps[i + 1];
144        i++;
145    }
146    temps[i] = tmp;
147}
148
149static inline TCGv load_cpu_offset(int offset)
150{
151    TCGv tmp = new_tmp();
152    tcg_gen_ld_i32(tmp, cpu_env, offset);
153    return tmp;
154}
155
156#define load_cpu_field(name) load_cpu_offset(offsetof(CPUState, name))
157
158static inline void store_cpu_offset(TCGv var, int offset)
159{
160    tcg_gen_st_i32(var, cpu_env, offset);
161    dead_tmp(var);
162}
163
164#define store_cpu_field(var, name) \
165    store_cpu_offset(var, offsetof(CPUState, name))
166
167/* Set a variable to the value of a CPU register.  */
168static void load_reg_var(DisasContext *s, TCGv var, int reg)
169{
170    if (reg == 15) {
171        uint32_t addr;
172        /* normaly, since we updated PC, we need only to add one insn */
173        if (s->thumb)
174            addr = (long)s->pc + 2;
175        else
176            addr = (long)s->pc + 4;
177        tcg_gen_movi_i32(var, addr);
178    } else {
179        tcg_gen_ld_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
180    }
181}
182
183/* Create a new temporary and set it to the value of a CPU register.  */
184static inline TCGv load_reg(DisasContext *s, int reg)
185{
186    TCGv tmp = new_tmp();
187    load_reg_var(s, tmp, reg);
188    return tmp;
189}
190
191/* Set a CPU register.  The source must be a temporary and will be
192   marked as dead.  */
193static void store_reg(DisasContext *s, int reg, TCGv var)
194{
195    if (reg == 15) {
196        tcg_gen_andi_i32(var, var, ~1);
197        s->is_jmp = DISAS_JUMP;
198    }
199    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, regs[reg]));
200    dead_tmp(var);
201}
202
203
204/* Basic operations.  */
205#define gen_op_movl_T0_T1() tcg_gen_mov_i32(cpu_T[0], cpu_T[1])
206#define gen_op_movl_T0_im(im) tcg_gen_movi_i32(cpu_T[0], im)
207#define gen_op_movl_T1_im(im) tcg_gen_movi_i32(cpu_T[1], im)
208
209#define gen_op_addl_T1_im(im) tcg_gen_addi_i32(cpu_T[1], cpu_T[1], im)
210#define gen_op_addl_T0_T1() tcg_gen_add_i32(cpu_T[0], cpu_T[0], cpu_T[1])
211#define gen_op_subl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[0], cpu_T[1])
212#define gen_op_rsbl_T0_T1() tcg_gen_sub_i32(cpu_T[0], cpu_T[1], cpu_T[0])
213
214#define gen_op_addl_T0_T1_cc() gen_helper_add_cc(cpu_T[0], cpu_T[0], cpu_T[1])
215#define gen_op_adcl_T0_T1_cc() gen_helper_adc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
216#define gen_op_subl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[0], cpu_T[1])
217#define gen_op_sbcl_T0_T1_cc() gen_helper_sbc_cc(cpu_T[0], cpu_T[0], cpu_T[1])
218#define gen_op_rsbl_T0_T1_cc() gen_helper_sub_cc(cpu_T[0], cpu_T[1], cpu_T[0])
219
220#define gen_op_andl_T0_T1() tcg_gen_and_i32(cpu_T[0], cpu_T[0], cpu_T[1])
221#define gen_op_xorl_T0_T1() tcg_gen_xor_i32(cpu_T[0], cpu_T[0], cpu_T[1])
222#define gen_op_orl_T0_T1() tcg_gen_or_i32(cpu_T[0], cpu_T[0], cpu_T[1])
223#define gen_op_notl_T0() tcg_gen_not_i32(cpu_T[0], cpu_T[0])
224#define gen_op_notl_T1() tcg_gen_not_i32(cpu_T[1], cpu_T[1])
225#define gen_op_logic_T0_cc() gen_logic_CC(cpu_T[0]);
226#define gen_op_logic_T1_cc() gen_logic_CC(cpu_T[1]);
227
228#define gen_op_shll_T1_im(im) tcg_gen_shli_i32(cpu_T[1], cpu_T[1], im)
229#define gen_op_shrl_T1_im(im) tcg_gen_shri_i32(cpu_T[1], cpu_T[1], im)
230
231/* Value extensions.  */
232#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
233#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
234#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
235#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
236
237#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
238#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
239
240#define gen_op_mul_T0_T1() tcg_gen_mul_i32(cpu_T[0], cpu_T[0], cpu_T[1])
241
242#define gen_set_cpsr(var, mask) gen_helper_cpsr_write(var, tcg_const_i32(mask))
243/* Set NZCV flags from the high 4 bits of var.  */
244#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
245
246static void gen_exception(int excp)
247{
248    TCGv tmp = new_tmp();
249    tcg_gen_movi_i32(tmp, excp);
250    gen_helper_exception(tmp);
251    dead_tmp(tmp);
252}
253
254static void gen_smul_dual(TCGv a, TCGv b)
255{
256    TCGv tmp1 = new_tmp();
257    TCGv tmp2 = new_tmp();
258    tcg_gen_ext16s_i32(tmp1, a);
259    tcg_gen_ext16s_i32(tmp2, b);
260    tcg_gen_mul_i32(tmp1, tmp1, tmp2);
261    dead_tmp(tmp2);
262    tcg_gen_sari_i32(a, a, 16);
263    tcg_gen_sari_i32(b, b, 16);
264    tcg_gen_mul_i32(b, b, a);
265    tcg_gen_mov_i32(a, tmp1);
266    dead_tmp(tmp1);
267}
268
269/* Byteswap each halfword.  */
270static void gen_rev16(TCGv var)
271{
272    TCGv tmp = new_tmp();
273    tcg_gen_shri_i32(tmp, var, 8);
274    tcg_gen_andi_i32(tmp, tmp, 0x00ff00ff);
275    tcg_gen_shli_i32(var, var, 8);
276    tcg_gen_andi_i32(var, var, 0xff00ff00);
277    tcg_gen_or_i32(var, var, tmp);
278    dead_tmp(tmp);
279}
280
281/* Byteswap low halfword and sign extend.  */
282static void gen_revsh(TCGv var)
283{
284    TCGv tmp = new_tmp();
285    tcg_gen_shri_i32(tmp, var, 8);
286    tcg_gen_andi_i32(tmp, tmp, 0x00ff);
287    tcg_gen_shli_i32(var, var, 8);
288    tcg_gen_ext8s_i32(var, var);
289    tcg_gen_or_i32(var, var, tmp);
290    dead_tmp(tmp);
291}
292
293/* Unsigned bitfield extract.  */
294static void gen_ubfx(TCGv var, int shift, uint32_t mask)
295{
296    if (shift)
297        tcg_gen_shri_i32(var, var, shift);
298    tcg_gen_andi_i32(var, var, mask);
299}
300
301/* Signed bitfield extract.  */
302static void gen_sbfx(TCGv var, int shift, int width)
303{
304    uint32_t signbit;
305
306    if (shift)
307        tcg_gen_sari_i32(var, var, shift);
308    if (shift + width < 32) {
309        signbit = 1u << (width - 1);
310        tcg_gen_andi_i32(var, var, (1u << width) - 1);
311        tcg_gen_xori_i32(var, var, signbit);
312        tcg_gen_subi_i32(var, var, signbit);
313    }
314}
315
316/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
317static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
318{
319    tcg_gen_andi_i32(val, val, mask);
320    tcg_gen_shli_i32(val, val, shift);
321    tcg_gen_andi_i32(base, base, ~(mask << shift));
322    tcg_gen_or_i32(dest, base, val);
323}
324
325/* Round the top 32 bits of a 64-bit value.  */
326static void gen_roundqd(TCGv a, TCGv b)
327{
328    tcg_gen_shri_i32(a, a, 31);
329    tcg_gen_add_i32(a, a, b);
330}
331
332/* FIXME: Most targets have native widening multiplication.
333   It would be good to use that instead of a full wide multiply.  */
334/* 32x32->64 multiply.  Marks inputs as dead.  */
335static TCGv_i64 gen_mulu_i64_i32(TCGv a, TCGv b)
336{
337    TCGv_i64 tmp1 = tcg_temp_new_i64();
338    TCGv_i64 tmp2 = tcg_temp_new_i64();
339
340    tcg_gen_extu_i32_i64(tmp1, a);
341    dead_tmp(a);
342    tcg_gen_extu_i32_i64(tmp2, b);
343    dead_tmp(b);
344    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
345    return tmp1;
346}
347
348static TCGv_i64 gen_muls_i64_i32(TCGv a, TCGv b)
349{
350    TCGv_i64 tmp1 = tcg_temp_new_i64();
351    TCGv_i64 tmp2 = tcg_temp_new_i64();
352
353    tcg_gen_ext_i32_i64(tmp1, a);
354    dead_tmp(a);
355    tcg_gen_ext_i32_i64(tmp2, b);
356    dead_tmp(b);
357    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
358    return tmp1;
359}
360
361/* Unsigned 32x32->64 multiply.  */
362static void gen_op_mull_T0_T1(void)
363{
364    TCGv_i64 tmp1 = tcg_temp_new_i64();
365    TCGv_i64 tmp2 = tcg_temp_new_i64();
366
367    tcg_gen_extu_i32_i64(tmp1, cpu_T[0]);
368    tcg_gen_extu_i32_i64(tmp2, cpu_T[1]);
369    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
370    tcg_gen_trunc_i64_i32(cpu_T[0], tmp1);
371    tcg_gen_shri_i64(tmp1, tmp1, 32);
372    tcg_gen_trunc_i64_i32(cpu_T[1], tmp1);
373}
374
375/* Signed 32x32->64 multiply.  */
376static void gen_imull(TCGv a, TCGv b)
377{
378    TCGv_i64 tmp1 = tcg_temp_new_i64();
379    TCGv_i64 tmp2 = tcg_temp_new_i64();
380
381    tcg_gen_ext_i32_i64(tmp1, a);
382    tcg_gen_ext_i32_i64(tmp2, b);
383    tcg_gen_mul_i64(tmp1, tmp1, tmp2);
384    tcg_gen_trunc_i64_i32(a, tmp1);
385    tcg_gen_shri_i64(tmp1, tmp1, 32);
386    tcg_gen_trunc_i64_i32(b, tmp1);
387}
388#define gen_op_imull_T0_T1() gen_imull(cpu_T[0], cpu_T[1])
389
390/* Swap low and high halfwords.  */
391static void gen_swap_half(TCGv var)
392{
393    TCGv tmp = new_tmp();
394    tcg_gen_shri_i32(tmp, var, 16);
395    tcg_gen_shli_i32(var, var, 16);
396    tcg_gen_or_i32(var, var, tmp);
397    dead_tmp(tmp);
398}
399
400/* Dual 16-bit add.  Result placed in t0 and t1 is marked as dead.
401    tmp = (t0 ^ t1) & 0x8000;
402    t0 &= ~0x8000;
403    t1 &= ~0x8000;
404    t0 = (t0 + t1) ^ tmp;
405 */
406
407static void gen_add16(TCGv t0, TCGv t1)
408{
409    TCGv tmp = new_tmp();
410    tcg_gen_xor_i32(tmp, t0, t1);
411    tcg_gen_andi_i32(tmp, tmp, 0x8000);
412    tcg_gen_andi_i32(t0, t0, ~0x8000);
413    tcg_gen_andi_i32(t1, t1, ~0x8000);
414    tcg_gen_add_i32(t0, t0, t1);
415    tcg_gen_xor_i32(t0, t0, tmp);
416    dead_tmp(tmp);
417    dead_tmp(t1);
418}
419
420#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, CF))
421
422/* Set CF to the top bit of var.  */
423static void gen_set_CF_bit31(TCGv var)
424{
425    TCGv tmp = new_tmp();
426    tcg_gen_shri_i32(tmp, var, 31);
427    gen_set_CF(tmp);
428    dead_tmp(tmp);
429}
430
431/* Set N and Z flags from var.  */
432static inline void gen_logic_CC(TCGv var)
433{
434    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, NF));
435    tcg_gen_st_i32(var, cpu_env, offsetof(CPUState, ZF));
436}
437
438/* T0 += T1 + CF.  */
439static void gen_adc_T0_T1(void)
440{
441    TCGv tmp;
442    gen_op_addl_T0_T1();
443    tmp = load_cpu_field(CF);
444    tcg_gen_add_i32(cpu_T[0], cpu_T[0], tmp);
445    dead_tmp(tmp);
446}
447
448/* dest = T0 + T1 + CF. */
449static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
450{
451    TCGv tmp;
452    tcg_gen_add_i32(dest, t0, t1);
453    tmp = load_cpu_field(CF);
454    tcg_gen_add_i32(dest, dest, tmp);
455    dead_tmp(tmp);
456}
457
458/* dest = T0 - T1 + CF - 1.  */
459static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
460{
461    TCGv tmp;
462    tcg_gen_sub_i32(dest, t0, t1);
463    tmp = load_cpu_field(CF);
464    tcg_gen_add_i32(dest, dest, tmp);
465    tcg_gen_subi_i32(dest, dest, 1);
466    dead_tmp(tmp);
467}
468
469#define gen_sbc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[0], cpu_T[1])
470#define gen_rsc_T0_T1() gen_sub_carry(cpu_T[0], cpu_T[1], cpu_T[0])
471
472/* T0 &= ~T1.  Clobbers T1.  */
473/* FIXME: Implement bic natively.  */
474static inline void tcg_gen_bic_i32(TCGv dest, TCGv t0, TCGv t1)
475{
476    TCGv tmp = new_tmp();
477    tcg_gen_not_i32(tmp, t1);
478    tcg_gen_and_i32(dest, t0, tmp);
479    dead_tmp(tmp);
480}
481static inline void gen_op_bicl_T0_T1(void)
482{
483    gen_op_notl_T1();
484    gen_op_andl_T0_T1();
485}
486
487/* FIXME:  Implement this natively.  */
488#define tcg_gen_abs_i32(t0, t1) gen_helper_abs(t0, t1)
489
490/* FIXME:  Implement this natively.  */
491static void tcg_gen_rori_i32(TCGv t0, TCGv t1, int i)
492{
493    TCGv tmp;
494
495    if (i == 0)
496        return;
497
498    tmp = new_tmp();
499    tcg_gen_shri_i32(tmp, t1, i);
500    tcg_gen_shli_i32(t1, t1, 32 - i);
501    tcg_gen_or_i32(t0, t1, tmp);
502    dead_tmp(tmp);
503}
504
505static void shifter_out_im(TCGv var, int shift)
506{
507    TCGv tmp = new_tmp();
508    if (shift == 0) {
509        tcg_gen_andi_i32(tmp, var, 1);
510    } else {
511        tcg_gen_shri_i32(tmp, var, shift);
512        if (shift != 31)
513            tcg_gen_andi_i32(tmp, tmp, 1);
514    }
515    gen_set_CF(tmp);
516    dead_tmp(tmp);
517}
518
519/* Shift by immediate.  Includes special handling for shift == 0.  */
520static inline void gen_arm_shift_im(TCGv var, int shiftop, int shift, int flags)
521{
522    switch (shiftop) {
523    case 0: /* LSL */
524        if (shift != 0) {
525            if (flags)
526                shifter_out_im(var, 32 - shift);
527            tcg_gen_shli_i32(var, var, shift);
528        }
529        break;
530    case 1: /* LSR */
531        if (shift == 0) {
532            if (flags) {
533                tcg_gen_shri_i32(var, var, 31);
534                gen_set_CF(var);
535            }
536            tcg_gen_movi_i32(var, 0);
537        } else {
538            if (flags)
539                shifter_out_im(var, shift - 1);
540            tcg_gen_shri_i32(var, var, shift);
541        }
542        break;
543    case 2: /* ASR */
544        if (shift == 0)
545            shift = 32;
546        if (flags)
547            shifter_out_im(var, shift - 1);
548        if (shift == 32)
549          shift = 31;
550        tcg_gen_sari_i32(var, var, shift);
551        break;
552    case 3: /* ROR/RRX */
553        if (shift != 0) {
554            if (flags)
555                shifter_out_im(var, shift - 1);
556            tcg_gen_rori_i32(var, var, shift); break;
557        } else {
558            TCGv tmp = load_cpu_field(CF);
559            if (flags)
560                shifter_out_im(var, 0);
561            tcg_gen_shri_i32(var, var, 1);
562            tcg_gen_shli_i32(tmp, tmp, 31);
563            tcg_gen_or_i32(var, var, tmp);
564            dead_tmp(tmp);
565        }
566    }
567};
568
569static inline void gen_arm_shift_reg(TCGv var, int shiftop,
570                                     TCGv shift, int flags)
571{
572    if (flags) {
573        switch (shiftop) {
574        case 0: gen_helper_shl_cc(var, var, shift); break;
575        case 1: gen_helper_shr_cc(var, var, shift); break;
576        case 2: gen_helper_sar_cc(var, var, shift); break;
577        case 3: gen_helper_ror_cc(var, var, shift); break;
578        }
579    } else {
580        switch (shiftop) {
581        case 0: gen_helper_shl(var, var, shift); break;
582        case 1: gen_helper_shr(var, var, shift); break;
583        case 2: gen_helper_sar(var, var, shift); break;
584        case 3: gen_helper_ror(var, var, shift); break;
585        }
586    }
587    dead_tmp(shift);
588}
589
590#define PAS_OP(pfx) \
591    switch (op2) {  \
592    case 0: gen_pas_helper(glue(pfx,add16)); break; \
593    case 1: gen_pas_helper(glue(pfx,addsubx)); break; \
594    case 2: gen_pas_helper(glue(pfx,subaddx)); break; \
595    case 3: gen_pas_helper(glue(pfx,sub16)); break; \
596    case 4: gen_pas_helper(glue(pfx,add8)); break; \
597    case 7: gen_pas_helper(glue(pfx,sub8)); break; \
598    }
599static void gen_arm_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
600{
601    TCGv_ptr tmp;
602
603    switch (op1) {
604#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
605    case 1:
606        tmp = tcg_temp_new_ptr();
607        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
608        PAS_OP(s)
609        break;
610    case 5:
611        tmp = tcg_temp_new_ptr();
612        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
613        PAS_OP(u)
614        break;
615#undef gen_pas_helper
616#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
617    case 2:
618        PAS_OP(q);
619        break;
620    case 3:
621        PAS_OP(sh);
622        break;
623    case 6:
624        PAS_OP(uq);
625        break;
626    case 7:
627        PAS_OP(uh);
628        break;
629#undef gen_pas_helper
630    }
631}
632#undef PAS_OP
633
634/* For unknown reasons Arm and Thumb-2 use arbitrarily different encodings.  */
635#define PAS_OP(pfx) \
636    switch (op2) {  \
637    case 0: gen_pas_helper(glue(pfx,add8)); break; \
638    case 1: gen_pas_helper(glue(pfx,add16)); break; \
639    case 2: gen_pas_helper(glue(pfx,addsubx)); break; \
640    case 4: gen_pas_helper(glue(pfx,sub8)); break; \
641    case 5: gen_pas_helper(glue(pfx,sub16)); break; \
642    case 6: gen_pas_helper(glue(pfx,subaddx)); break; \
643    }
644static void gen_thumb2_parallel_addsub(int op1, int op2, TCGv a, TCGv b)
645{
646    TCGv_ptr tmp;
647
648    switch (op1) {
649#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b, tmp)
650    case 0:
651        tmp = tcg_temp_new_ptr();
652        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
653        PAS_OP(s)
654        break;
655    case 4:
656        tmp = tcg_temp_new_ptr();
657        tcg_gen_addi_ptr(tmp, cpu_env, offsetof(CPUState, GE));
658        PAS_OP(u)
659        break;
660#undef gen_pas_helper
661#define gen_pas_helper(name) glue(gen_helper_,name)(a, a, b)
662    case 1:
663        PAS_OP(q);
664        break;
665    case 2:
666        PAS_OP(sh);
667        break;
668    case 5:
669        PAS_OP(uq);
670        break;
671    case 6:
672        PAS_OP(uh);
673        break;
674#undef gen_pas_helper
675    }
676}
677#undef PAS_OP
678
679static void gen_test_cc(int cc, int label)
680{
681    TCGv tmp;
682    TCGv tmp2;
683    int inv;
684
685    switch (cc) {
686    case 0: /* eq: Z */
687        tmp = load_cpu_field(ZF);
688        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
689        break;
690    case 1: /* ne: !Z */
691        tmp = load_cpu_field(ZF);
692        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
693        break;
694    case 2: /* cs: C */
695        tmp = load_cpu_field(CF);
696        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
697        break;
698    case 3: /* cc: !C */
699        tmp = load_cpu_field(CF);
700        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
701        break;
702    case 4: /* mi: N */
703        tmp = load_cpu_field(NF);
704        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
705        break;
706    case 5: /* pl: !N */
707        tmp = load_cpu_field(NF);
708        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
709        break;
710    case 6: /* vs: V */
711        tmp = load_cpu_field(VF);
712        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
713        break;
714    case 7: /* vc: !V */
715        tmp = load_cpu_field(VF);
716        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
717        break;
718    case 8: /* hi: C && !Z */
719        inv = gen_new_label();
720        tmp = load_cpu_field(CF);
721        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
722        dead_tmp(tmp);
723        tmp = load_cpu_field(ZF);
724        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
725        gen_set_label(inv);
726        break;
727    case 9: /* ls: !C || Z */
728        tmp = load_cpu_field(CF);
729        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
730        dead_tmp(tmp);
731        tmp = load_cpu_field(ZF);
732        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
733        break;
734    case 10: /* ge: N == V -> N ^ V == 0 */
735        tmp = load_cpu_field(VF);
736        tmp2 = load_cpu_field(NF);
737        tcg_gen_xor_i32(tmp, tmp, tmp2);
738        dead_tmp(tmp2);
739        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
740        break;
741    case 11: /* lt: N != V -> N ^ V != 0 */
742        tmp = load_cpu_field(VF);
743        tmp2 = load_cpu_field(NF);
744        tcg_gen_xor_i32(tmp, tmp, tmp2);
745        dead_tmp(tmp2);
746        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
747        break;
748    case 12: /* gt: !Z && N == V */
749        inv = gen_new_label();
750        tmp = load_cpu_field(ZF);
751        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
752        dead_tmp(tmp);
753        tmp = load_cpu_field(VF);
754        tmp2 = load_cpu_field(NF);
755        tcg_gen_xor_i32(tmp, tmp, tmp2);
756        dead_tmp(tmp2);
757        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
758        gen_set_label(inv);
759        break;
760    case 13: /* le: Z || N != V */
761        tmp = load_cpu_field(ZF);
762        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
763        dead_tmp(tmp);
764        tmp = load_cpu_field(VF);
765        tmp2 = load_cpu_field(NF);
766        tcg_gen_xor_i32(tmp, tmp, tmp2);
767        dead_tmp(tmp2);
768        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
769        break;
770    default:
771        fprintf(stderr, "Bad condition code 0x%x\n", cc);
772        abort();
773    }
774    dead_tmp(tmp);
775}
776
777static const uint8_t table_logic_cc[16] = {
778    1, /* and */
779    1, /* xor */
780    0, /* sub */
781    0, /* rsb */
782    0, /* add */
783    0, /* adc */
784    0, /* sbc */
785    0, /* rsc */
786    1, /* andl */
787    1, /* xorl */
788    0, /* cmp */
789    0, /* cmn */
790    1, /* orr */
791    1, /* mov */
792    1, /* bic */
793    1, /* mvn */
794};
795
796/* Set PC and Thumb state from an immediate address.  */
797static inline void gen_bx_im(DisasContext *s, uint32_t addr)
798{
799    TCGv tmp;
800
801    s->is_jmp = DISAS_UPDATE;
802    tmp = new_tmp();
803    if (s->thumb != (addr & 1)) {
804        tcg_gen_movi_i32(tmp, addr & 1);
805        tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, thumb));
806    }
807    tcg_gen_movi_i32(tmp, addr & ~1);
808    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[15]));
809    dead_tmp(tmp);
810}
811
812/* Set PC and Thumb state from var.  var is marked as dead.  */
813static inline void gen_bx(DisasContext *s, TCGv var)
814{
815    TCGv tmp;
816
817    s->is_jmp = DISAS_UPDATE;
818    tmp = new_tmp();
819    tcg_gen_andi_i32(tmp, var, 1);
820    store_cpu_field(tmp, thumb);
821    tcg_gen_andi_i32(var, var, ~1);
822    store_cpu_field(var, regs[15]);
823}
824
825/* TODO: This should be removed.  Use gen_bx instead.  */
826static inline void gen_bx_T0(DisasContext *s)
827{
828    TCGv tmp = new_tmp();
829    tcg_gen_mov_i32(tmp, cpu_T[0]);
830    gen_bx(s, tmp);
831}
832
833/* Variant of store_reg which uses branch&exchange logic when storing
834   to r15 in ARM architecture v7 and above. The source must be a temporary
835   and will be marked as dead. */
836static inline void store_reg_bx(CPUState *env, DisasContext *s,
837                                int reg, TCGv var)
838{
839    if (reg == 15 && ENABLE_ARCH_7) {
840        gen_bx(s, var);
841    } else {
842        store_reg(s, reg, var);
843    }
844}
845
846static inline TCGv gen_ld8s(TCGv addr, int index)
847{
848    TCGv tmp = new_tmp();
849    tcg_gen_qemu_ld8s(tmp, addr, index);
850    return tmp;
851}
852static inline TCGv gen_ld8u(TCGv addr, int index)
853{
854    TCGv tmp = new_tmp();
855    tcg_gen_qemu_ld8u(tmp, addr, index);
856    return tmp;
857}
858static inline TCGv gen_ld16s(TCGv addr, int index)
859{
860    TCGv tmp = new_tmp();
861    tcg_gen_qemu_ld16s(tmp, addr, index);
862    return tmp;
863}
864static inline TCGv gen_ld16u(TCGv addr, int index)
865{
866    TCGv tmp = new_tmp();
867    tcg_gen_qemu_ld16u(tmp, addr, index);
868    return tmp;
869}
870static inline TCGv gen_ld32(TCGv addr, int index)
871{
872    TCGv tmp = new_tmp();
873    tcg_gen_qemu_ld32u(tmp, addr, index);
874    return tmp;
875}
876static inline void gen_st8(TCGv val, TCGv addr, int index)
877{
878    tcg_gen_qemu_st8(val, addr, index);
879    dead_tmp(val);
880}
881static inline void gen_st16(TCGv val, TCGv addr, int index)
882{
883    tcg_gen_qemu_st16(val, addr, index);
884    dead_tmp(val);
885}
886static inline void gen_st32(TCGv val, TCGv addr, int index)
887{
888    tcg_gen_qemu_st32(val, addr, index);
889    dead_tmp(val);
890}
891
892static inline void gen_movl_T0_reg(DisasContext *s, int reg)
893{
894    load_reg_var(s, cpu_T[0], reg);
895}
896
897static inline void gen_movl_T1_reg(DisasContext *s, int reg)
898{
899    load_reg_var(s, cpu_T[1], reg);
900}
901
902static inline void gen_movl_T2_reg(DisasContext *s, int reg)
903{
904    load_reg_var(s, cpu_T[2], reg);
905}
906
907static inline void gen_set_pc_im(uint32_t val)
908{
909    TCGv tmp = new_tmp();
910    tcg_gen_movi_i32(tmp, val);
911    store_cpu_field(tmp, regs[15]);
912}
913
914static inline void gen_movl_reg_TN(DisasContext *s, int reg, int t)
915{
916    TCGv tmp;
917    if (reg == 15) {
918        tmp = new_tmp();
919        tcg_gen_andi_i32(tmp, cpu_T[t], ~1);
920    } else {
921        tmp = cpu_T[t];
922    }
923    tcg_gen_st_i32(tmp, cpu_env, offsetof(CPUState, regs[reg]));
924    if (reg == 15) {
925        dead_tmp(tmp);
926        s->is_jmp = DISAS_JUMP;
927    }
928}
929
930static inline void gen_movl_reg_T0(DisasContext *s, int reg)
931{
932    gen_movl_reg_TN(s, reg, 0);
933}
934
935static inline void gen_movl_reg_T1(DisasContext *s, int reg)
936{
937    gen_movl_reg_TN(s, reg, 1);
938}
939
940/* Force a TB lookup after an instruction that changes the CPU state.  */
941static inline void gen_lookup_tb(DisasContext *s)
942{
943    gen_op_movl_T0_im(s->pc);
944    gen_movl_reg_T0(s, 15);
945    s->is_jmp = DISAS_UPDATE;
946}
947
948static inline void gen_add_data_offset(DisasContext *s, unsigned int insn,
949                                       TCGv var)
950{
951    int val, rm, shift, shiftop;
952    TCGv offset;
953
954    if (!(insn & (1 << 25))) {
955        /* immediate */
956        val = insn & 0xfff;
957        if (!(insn & (1 << 23)))
958            val = -val;
959        if (val != 0)
960            tcg_gen_addi_i32(var, var, val);
961    } else {
962        /* shift/register */
963        rm = (insn) & 0xf;
964        shift = (insn >> 7) & 0x1f;
965        shiftop = (insn >> 5) & 3;
966        offset = load_reg(s, rm);
967        gen_arm_shift_im(offset, shiftop, shift, 0);
968        if (!(insn & (1 << 23)))
969            tcg_gen_sub_i32(var, var, offset);
970        else
971            tcg_gen_add_i32(var, var, offset);
972        dead_tmp(offset);
973    }
974}
975
976static inline void gen_add_datah_offset(DisasContext *s, unsigned int insn,
977                                        int extra, TCGv var)
978{
979    int val, rm;
980    TCGv offset;
981
982    if (insn & (1 << 22)) {
983        /* immediate */
984        val = (insn & 0xf) | ((insn >> 4) & 0xf0);
985        if (!(insn & (1 << 23)))
986            val = -val;
987        val += extra;
988        if (val != 0)
989            tcg_gen_addi_i32(var, var, val);
990    } else {
991        /* register */
992        if (extra)
993            tcg_gen_addi_i32(var, var, extra);
994        rm = (insn) & 0xf;
995        offset = load_reg(s, rm);
996        if (!(insn & (1 << 23)))
997            tcg_gen_sub_i32(var, var, offset);
998        else
999            tcg_gen_add_i32(var, var, offset);
1000        dead_tmp(offset);
1001    }
1002}
1003
1004#define VFP_OP2(name)                                                 \
1005static inline void gen_vfp_##name(int dp)                             \
1006{                                                                     \
1007    if (dp)                                                           \
1008        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, cpu_F1d, cpu_env); \
1009    else                                                              \
1010        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, cpu_F1s, cpu_env); \
1011}
1012
1013VFP_OP2(add)
1014VFP_OP2(sub)
1015VFP_OP2(mul)
1016VFP_OP2(div)
1017
1018#undef VFP_OP2
1019
1020static inline void gen_vfp_abs(int dp)
1021{
1022    if (dp)
1023        gen_helper_vfp_absd(cpu_F0d, cpu_F0d);
1024    else
1025        gen_helper_vfp_abss(cpu_F0s, cpu_F0s);
1026}
1027
1028static inline void gen_vfp_neg(int dp)
1029{
1030    if (dp)
1031        gen_helper_vfp_negd(cpu_F0d, cpu_F0d);
1032    else
1033        gen_helper_vfp_negs(cpu_F0s, cpu_F0s);
1034}
1035
1036static inline void gen_vfp_sqrt(int dp)
1037{
1038    if (dp)
1039        gen_helper_vfp_sqrtd(cpu_F0d, cpu_F0d, cpu_env);
1040    else
1041        gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
1042}
1043
1044static inline void gen_vfp_cmp(int dp)
1045{
1046    if (dp)
1047        gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
1048    else
1049        gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
1050}
1051
1052static inline void gen_vfp_cmpe(int dp)
1053{
1054    if (dp)
1055        gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
1056    else
1057        gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
1058}
1059
1060static inline void gen_vfp_F1_ld0(int dp)
1061{
1062    if (dp)
1063        tcg_gen_movi_i64(cpu_F1d, 0);
1064    else
1065        tcg_gen_movi_i32(cpu_F1s, 0);
1066}
1067
1068static inline void gen_vfp_uito(int dp)
1069{
1070    if (dp)
1071        gen_helper_vfp_uitod(cpu_F0d, cpu_F0s, cpu_env);
1072    else
1073        gen_helper_vfp_uitos(cpu_F0s, cpu_F0s, cpu_env);
1074}
1075
1076static inline void gen_vfp_sito(int dp)
1077{
1078    if (dp)
1079        gen_helper_vfp_sitod(cpu_F0d, cpu_F0s, cpu_env);
1080    else
1081        gen_helper_vfp_sitos(cpu_F0s, cpu_F0s, cpu_env);
1082}
1083
1084static inline void gen_vfp_toui(int dp)
1085{
1086    if (dp)
1087        gen_helper_vfp_touid(cpu_F0s, cpu_F0d, cpu_env);
1088    else
1089        gen_helper_vfp_touis(cpu_F0s, cpu_F0s, cpu_env);
1090}
1091
1092static inline void gen_vfp_touiz(int dp)
1093{
1094    if (dp)
1095        gen_helper_vfp_touizd(cpu_F0s, cpu_F0d, cpu_env);
1096    else
1097        gen_helper_vfp_touizs(cpu_F0s, cpu_F0s, cpu_env);
1098}
1099
1100static inline void gen_vfp_tosi(int dp)
1101{
1102    if (dp)
1103        gen_helper_vfp_tosid(cpu_F0s, cpu_F0d, cpu_env);
1104    else
1105        gen_helper_vfp_tosis(cpu_F0s, cpu_F0s, cpu_env);
1106}
1107
1108static inline void gen_vfp_tosiz(int dp)
1109{
1110    if (dp)
1111        gen_helper_vfp_tosizd(cpu_F0s, cpu_F0d, cpu_env);
1112    else
1113        gen_helper_vfp_tosizs(cpu_F0s, cpu_F0s, cpu_env);
1114}
1115
1116#define VFP_GEN_FIX(name) \
1117static inline void gen_vfp_##name(int dp, int shift) \
1118{ \
1119    if (dp) \
1120        gen_helper_vfp_##name##d(cpu_F0d, cpu_F0d, tcg_const_i32(shift), cpu_env);\
1121    else \
1122        gen_helper_vfp_##name##s(cpu_F0s, cpu_F0s, tcg_const_i32(shift), cpu_env);\
1123}
1124VFP_GEN_FIX(tosh)
1125VFP_GEN_FIX(tosl)
1126VFP_GEN_FIX(touh)
1127VFP_GEN_FIX(toul)
1128VFP_GEN_FIX(shto)
1129VFP_GEN_FIX(slto)
1130VFP_GEN_FIX(uhto)
1131VFP_GEN_FIX(ulto)
1132#undef VFP_GEN_FIX
1133
1134static inline void gen_vfp_ld(DisasContext *s, int dp)
1135{
1136    if (dp)
1137        tcg_gen_qemu_ld64(cpu_F0d, cpu_T[1], IS_USER(s));
1138    else
1139        tcg_gen_qemu_ld32u(cpu_F0s, cpu_T[1], IS_USER(s));
1140}
1141
1142static inline void gen_vfp_st(DisasContext *s, int dp)
1143{
1144    if (dp)
1145        tcg_gen_qemu_st64(cpu_F0d, cpu_T[1], IS_USER(s));
1146    else
1147        tcg_gen_qemu_st32(cpu_F0s, cpu_T[1], IS_USER(s));
1148}
1149
1150static inline long
1151vfp_reg_offset (int dp, int reg)
1152{
1153    if (dp)
1154        return offsetof(CPUARMState, vfp.regs[reg]);
1155    else if (reg & 1) {
1156        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1157          + offsetof(CPU_DoubleU, l.upper);
1158    } else {
1159        return offsetof(CPUARMState, vfp.regs[reg >> 1])
1160          + offsetof(CPU_DoubleU, l.lower);
1161    }
1162}
1163
1164/* Return the offset of a 32-bit piece of a NEON register.
1165   zero is the least significant end of the register.  */
1166static inline long
1167neon_reg_offset (int reg, int n)
1168{
1169    int sreg;
1170    sreg = reg * 2 + n;
1171    return vfp_reg_offset(0, sreg);
1172}
1173
1174/* FIXME: Remove these.  */
1175#define neon_T0 cpu_T[0]
1176#define neon_T1 cpu_T[1]
1177#define NEON_GET_REG(T, reg, n) \
1178  tcg_gen_ld_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
1179#define NEON_SET_REG(T, reg, n) \
1180  tcg_gen_st_i32(neon_##T, cpu_env, neon_reg_offset(reg, n))
1181
1182static TCGv neon_load_reg(int reg, int pass)
1183{
1184    TCGv tmp = new_tmp();
1185    tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1186    return tmp;
1187}
1188
1189static void neon_store_reg(int reg, int pass, TCGv var)
1190{
1191    tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1192    dead_tmp(var);
1193}
1194
1195static inline void neon_load_reg64(TCGv_i64 var, int reg)
1196{
1197    tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(1, reg));
1198}
1199
1200static inline void neon_store_reg64(TCGv_i64 var, int reg)
1201{
1202    tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(1, reg));
1203}
1204
1205#define tcg_gen_ld_f32 tcg_gen_ld_i32
1206#define tcg_gen_ld_f64 tcg_gen_ld_i64
1207#define tcg_gen_st_f32 tcg_gen_st_i32
1208#define tcg_gen_st_f64 tcg_gen_st_i64
1209
1210static inline void gen_mov_F0_vreg(int dp, int reg)
1211{
1212    if (dp)
1213        tcg_gen_ld_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1214    else
1215        tcg_gen_ld_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1216}
1217
1218static inline void gen_mov_F1_vreg(int dp, int reg)
1219{
1220    if (dp)
1221        tcg_gen_ld_f64(cpu_F1d, cpu_env, vfp_reg_offset(dp, reg));
1222    else
1223        tcg_gen_ld_f32(cpu_F1s, cpu_env, vfp_reg_offset(dp, reg));
1224}
1225
1226static inline void gen_mov_vreg_F0(int dp, int reg)
1227{
1228    if (dp)
1229        tcg_gen_st_f64(cpu_F0d, cpu_env, vfp_reg_offset(dp, reg));
1230    else
1231        tcg_gen_st_f32(cpu_F0s, cpu_env, vfp_reg_offset(dp, reg));
1232}
1233
1234#define ARM_CP_RW_BIT	(1 << 20)
1235
1236static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1237{
1238    tcg_gen_ld_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
1239}
1240
1241static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1242{
1243    tcg_gen_st_i64(var, cpu_env, offsetof(CPUState, iwmmxt.regs[reg]));
1244}
1245
1246static inline void gen_op_iwmmxt_movl_wCx_T0(int reg)
1247{
1248    tcg_gen_st_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1249}
1250
1251static inline void gen_op_iwmmxt_movl_T0_wCx(int reg)
1252{
1253    tcg_gen_ld_i32(cpu_T[0], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1254}
1255
1256static inline void gen_op_iwmmxt_movl_T1_wCx(int reg)
1257{
1258    tcg_gen_ld_i32(cpu_T[1], cpu_env, offsetof(CPUState, iwmmxt.cregs[reg]));
1259}
1260
1261static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1262{
1263    iwmmxt_store_reg(cpu_M0, rn);
1264}
1265
1266static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1267{
1268    iwmmxt_load_reg(cpu_M0, rn);
1269}
1270
1271static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1272{
1273    iwmmxt_load_reg(cpu_V1, rn);
1274    tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1275}
1276
1277static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1278{
1279    iwmmxt_load_reg(cpu_V1, rn);
1280    tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1281}
1282
1283static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1284{
1285    iwmmxt_load_reg(cpu_V1, rn);
1286    tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1287}
1288
1289#define IWMMXT_OP(name) \
1290static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1291{ \
1292    iwmmxt_load_reg(cpu_V1, rn); \
1293    gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1294}
1295
1296#define IWMMXT_OP_ENV(name) \
1297static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1298{ \
1299    iwmmxt_load_reg(cpu_V1, rn); \
1300    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1301}
1302
1303#define IWMMXT_OP_ENV_SIZE(name) \
1304IWMMXT_OP_ENV(name##b) \
1305IWMMXT_OP_ENV(name##w) \
1306IWMMXT_OP_ENV(name##l)
1307
1308#define IWMMXT_OP_ENV1(name) \
1309static inline void gen_op_iwmmxt_##name##_M0(void) \
1310{ \
1311    gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1312}
1313
1314IWMMXT_OP(maddsq)
1315IWMMXT_OP(madduq)
1316IWMMXT_OP(sadb)
1317IWMMXT_OP(sadw)
1318IWMMXT_OP(mulslw)
1319IWMMXT_OP(mulshw)
1320IWMMXT_OP(mululw)
1321IWMMXT_OP(muluhw)
1322IWMMXT_OP(macsw)
1323IWMMXT_OP(macuw)
1324
1325IWMMXT_OP_ENV_SIZE(unpackl)
1326IWMMXT_OP_ENV_SIZE(unpackh)
1327
1328IWMMXT_OP_ENV1(unpacklub)
1329IWMMXT_OP_ENV1(unpackluw)
1330IWMMXT_OP_ENV1(unpacklul)
1331IWMMXT_OP_ENV1(unpackhub)
1332IWMMXT_OP_ENV1(unpackhuw)
1333IWMMXT_OP_ENV1(unpackhul)
1334IWMMXT_OP_ENV1(unpacklsb)
1335IWMMXT_OP_ENV1(unpacklsw)
1336IWMMXT_OP_ENV1(unpacklsl)
1337IWMMXT_OP_ENV1(unpackhsb)
1338IWMMXT_OP_ENV1(unpackhsw)
1339IWMMXT_OP_ENV1(unpackhsl)
1340
1341IWMMXT_OP_ENV_SIZE(cmpeq)
1342IWMMXT_OP_ENV_SIZE(cmpgtu)
1343IWMMXT_OP_ENV_SIZE(cmpgts)
1344
1345IWMMXT_OP_ENV_SIZE(mins)
1346IWMMXT_OP_ENV_SIZE(minu)
1347IWMMXT_OP_ENV_SIZE(maxs)
1348IWMMXT_OP_ENV_SIZE(maxu)
1349
1350IWMMXT_OP_ENV_SIZE(subn)
1351IWMMXT_OP_ENV_SIZE(addn)
1352IWMMXT_OP_ENV_SIZE(subu)
1353IWMMXT_OP_ENV_SIZE(addu)
1354IWMMXT_OP_ENV_SIZE(subs)
1355IWMMXT_OP_ENV_SIZE(adds)
1356
1357IWMMXT_OP_ENV(avgb0)
1358IWMMXT_OP_ENV(avgb1)
1359IWMMXT_OP_ENV(avgw0)
1360IWMMXT_OP_ENV(avgw1)
1361
1362IWMMXT_OP(msadb)
1363
1364IWMMXT_OP_ENV(packuw)
1365IWMMXT_OP_ENV(packul)
1366IWMMXT_OP_ENV(packuq)
1367IWMMXT_OP_ENV(packsw)
1368IWMMXT_OP_ENV(packsl)
1369IWMMXT_OP_ENV(packsq)
1370
1371static inline void gen_op_iwmmxt_muladdsl_M0_T0_T1(void)
1372{
1373    gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
1374}
1375
1376static inline void gen_op_iwmmxt_muladdsw_M0_T0_T1(void)
1377{
1378    gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
1379}
1380
1381static inline void gen_op_iwmmxt_muladdswl_M0_T0_T1(void)
1382{
1383    gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1]);
1384}
1385
1386static inline void gen_op_iwmmxt_align_M0_T0_wRn(int rn)
1387{
1388    iwmmxt_load_reg(cpu_V1, rn);
1389    gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, cpu_T[0]);
1390}
1391
1392static inline void gen_op_iwmmxt_insr_M0_T0_T1(int shift)
1393{
1394    TCGv tmp = tcg_const_i32(shift);
1395    gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, cpu_T[0], cpu_T[1], tmp);
1396}
1397
1398static inline void gen_op_iwmmxt_extrsb_T0_M0(int shift)
1399{
1400    tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
1401    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
1402    tcg_gen_ext8s_i32(cpu_T[0], cpu_T[0]);
1403}
1404
1405static inline void gen_op_iwmmxt_extrsw_T0_M0(int shift)
1406{
1407    tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
1408    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
1409    tcg_gen_ext16s_i32(cpu_T[0], cpu_T[0]);
1410}
1411
1412static inline void gen_op_iwmmxt_extru_T0_M0(int shift, uint32_t mask)
1413{
1414    tcg_gen_shri_i64(cpu_M0, cpu_M0, shift);
1415    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_M0);
1416    if (mask != ~0u)
1417        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
1418}
1419
1420static void gen_op_iwmmxt_set_mup(void)
1421{
1422    TCGv tmp;
1423    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1424    tcg_gen_ori_i32(tmp, tmp, 2);
1425    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1426}
1427
1428static void gen_op_iwmmxt_set_cup(void)
1429{
1430    TCGv tmp;
1431    tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1432    tcg_gen_ori_i32(tmp, tmp, 1);
1433    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1434}
1435
1436static void gen_op_iwmmxt_setpsr_nz(void)
1437{
1438    TCGv tmp = new_tmp();
1439    gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1440    store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1441}
1442
1443static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1444{
1445    iwmmxt_load_reg(cpu_V1, rn);
1446    tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1447    tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1448}
1449
1450
1451static void gen_iwmmxt_movl_T0_T1_wRn(int rn)
1452{
1453    iwmmxt_load_reg(cpu_V0, rn);
1454    tcg_gen_trunc_i64_i32(cpu_T[0], cpu_V0);
1455    tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
1456    tcg_gen_trunc_i64_i32(cpu_T[1], cpu_V0);
1457}
1458
1459static void gen_iwmmxt_movl_wRn_T0_T1(int rn)
1460{
1461    tcg_gen_concat_i32_i64(cpu_V0, cpu_T[0], cpu_T[1]);
1462    iwmmxt_store_reg(cpu_V0, rn);
1463}
1464
1465static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn)
1466{
1467    int rd;
1468    uint32_t offset;
1469
1470    rd = (insn >> 16) & 0xf;
1471    gen_movl_T1_reg(s, rd);
1472
1473    offset = (insn & 0xff) << ((insn >> 7) & 2);
1474    if (insn & (1 << 24)) {
1475        /* Pre indexed */
1476        if (insn & (1 << 23))
1477            gen_op_addl_T1_im(offset);
1478        else
1479            gen_op_addl_T1_im(-offset);
1480
1481        if (insn & (1 << 21))
1482            gen_movl_reg_T1(s, rd);
1483    } else if (insn & (1 << 21)) {
1484        /* Post indexed */
1485        if (insn & (1 << 23))
1486            gen_op_movl_T0_im(offset);
1487        else
1488            gen_op_movl_T0_im(- offset);
1489        gen_op_addl_T0_T1();
1490        gen_movl_reg_T0(s, rd);
1491    } else if (!(insn & (1 << 23)))
1492        return 1;
1493    return 0;
1494}
1495
1496static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask)
1497{
1498    int rd = (insn >> 0) & 0xf;
1499
1500    if (insn & (1 << 8))
1501        if (rd < ARM_IWMMXT_wCGR0 || rd > ARM_IWMMXT_wCGR3)
1502            return 1;
1503        else
1504            gen_op_iwmmxt_movl_T0_wCx(rd);
1505    else
1506        gen_iwmmxt_movl_T0_T1_wRn(rd);
1507
1508    gen_op_movl_T1_im(mask);
1509    gen_op_andl_T0_T1();
1510    return 0;
1511}
1512
1513/* Disassemble an iwMMXt instruction.  Returns nonzero if an error occured
1514   (ie. an undefined instruction).  */
1515static int disas_iwmmxt_insn(CPUState *env, DisasContext *s, uint32_t insn)
1516{
1517    int rd, wrd;
1518    int rdhi, rdlo, rd0, rd1, i;
1519    TCGv tmp;
1520
1521    if ((insn & 0x0e000e00) == 0x0c000000) {
1522        if ((insn & 0x0fe00ff0) == 0x0c400000) {
1523            wrd = insn & 0xf;
1524            rdlo = (insn >> 12) & 0xf;
1525            rdhi = (insn >> 16) & 0xf;
1526            if (insn & ARM_CP_RW_BIT) {			/* TMRRC */
1527                gen_iwmmxt_movl_T0_T1_wRn(wrd);
1528                gen_movl_reg_T0(s, rdlo);
1529                gen_movl_reg_T1(s, rdhi);
1530            } else {					/* TMCRR */
1531                gen_movl_T0_reg(s, rdlo);
1532                gen_movl_T1_reg(s, rdhi);
1533                gen_iwmmxt_movl_wRn_T0_T1(wrd);
1534                gen_op_iwmmxt_set_mup();
1535            }
1536            return 0;
1537        }
1538
1539        wrd = (insn >> 12) & 0xf;
1540        if (gen_iwmmxt_address(s, insn))
1541            return 1;
1542        if (insn & ARM_CP_RW_BIT) {
1543            if ((insn >> 28) == 0xf) {			/* WLDRW wCx */
1544                tmp = gen_ld32(cpu_T[1], IS_USER(s));
1545                tcg_gen_mov_i32(cpu_T[0], tmp);
1546                dead_tmp(tmp);
1547                gen_op_iwmmxt_movl_wCx_T0(wrd);
1548            } else {
1549                i = 1;
1550                if (insn & (1 << 8)) {
1551                    if (insn & (1 << 22)) {		/* WLDRD */
1552                        tcg_gen_qemu_ld64(cpu_M0, cpu_T[1], IS_USER(s));
1553                        i = 0;
1554                    } else {				/* WLDRW wRd */
1555                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
1556                    }
1557                } else {
1558                    if (insn & (1 << 22)) {		/* WLDRH */
1559                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
1560                    } else {				/* WLDRB */
1561                        tmp = gen_ld8u(cpu_T[1], IS_USER(s));
1562                    }
1563                }
1564                if (i) {
1565                    tcg_gen_extu_i32_i64(cpu_M0, tmp);
1566                    dead_tmp(tmp);
1567                }
1568                gen_op_iwmmxt_movq_wRn_M0(wrd);
1569            }
1570        } else {
1571            if ((insn >> 28) == 0xf) {			/* WSTRW wCx */
1572                gen_op_iwmmxt_movl_T0_wCx(wrd);
1573                tmp = new_tmp();
1574                tcg_gen_mov_i32(tmp, cpu_T[0]);
1575                gen_st32(tmp, cpu_T[1], IS_USER(s));
1576            } else {
1577                gen_op_iwmmxt_movq_M0_wRn(wrd);
1578                tmp = new_tmp();
1579                if (insn & (1 << 8)) {
1580                    if (insn & (1 << 22)) {		/* WSTRD */
1581                        dead_tmp(tmp);
1582                        tcg_gen_qemu_st64(cpu_M0, cpu_T[1], IS_USER(s));
1583                    } else {				/* WSTRW wRd */
1584                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1585                        gen_st32(tmp, cpu_T[1], IS_USER(s));
1586                    }
1587                } else {
1588                    if (insn & (1 << 22)) {		/* WSTRH */
1589                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1590                        gen_st16(tmp, cpu_T[1], IS_USER(s));
1591                    } else {				/* WSTRB */
1592                        tcg_gen_trunc_i64_i32(tmp, cpu_M0);
1593                        gen_st8(tmp, cpu_T[1], IS_USER(s));
1594                    }
1595                }
1596            }
1597        }
1598        return 0;
1599    }
1600
1601    if ((insn & 0x0f000000) != 0x0e000000)
1602        return 1;
1603
1604    switch (((insn >> 12) & 0xf00) | ((insn >> 4) & 0xff)) {
1605    case 0x000:						/* WOR */
1606        wrd = (insn >> 12) & 0xf;
1607        rd0 = (insn >> 0) & 0xf;
1608        rd1 = (insn >> 16) & 0xf;
1609        gen_op_iwmmxt_movq_M0_wRn(rd0);
1610        gen_op_iwmmxt_orq_M0_wRn(rd1);
1611        gen_op_iwmmxt_setpsr_nz();
1612        gen_op_iwmmxt_movq_wRn_M0(wrd);
1613        gen_op_iwmmxt_set_mup();
1614        gen_op_iwmmxt_set_cup();
1615        break;
1616    case 0x011:						/* TMCR */
1617        if (insn & 0xf)
1618            return 1;
1619        rd = (insn >> 12) & 0xf;
1620        wrd = (insn >> 16) & 0xf;
1621        switch (wrd) {
1622        case ARM_IWMMXT_wCID:
1623        case ARM_IWMMXT_wCASF:
1624            break;
1625        case ARM_IWMMXT_wCon:
1626            gen_op_iwmmxt_set_cup();
1627            /* Fall through.  */
1628        case ARM_IWMMXT_wCSSF:
1629            gen_op_iwmmxt_movl_T0_wCx(wrd);
1630            gen_movl_T1_reg(s, rd);
1631            gen_op_bicl_T0_T1();
1632            gen_op_iwmmxt_movl_wCx_T0(wrd);
1633            break;
1634        case ARM_IWMMXT_wCGR0:
1635        case ARM_IWMMXT_wCGR1:
1636        case ARM_IWMMXT_wCGR2:
1637        case ARM_IWMMXT_wCGR3:
1638            gen_op_iwmmxt_set_cup();
1639            gen_movl_reg_T0(s, rd);
1640            gen_op_iwmmxt_movl_wCx_T0(wrd);
1641            break;
1642        default:
1643            return 1;
1644        }
1645        break;
1646    case 0x100:						/* WXOR */
1647        wrd = (insn >> 12) & 0xf;
1648        rd0 = (insn >> 0) & 0xf;
1649        rd1 = (insn >> 16) & 0xf;
1650        gen_op_iwmmxt_movq_M0_wRn(rd0);
1651        gen_op_iwmmxt_xorq_M0_wRn(rd1);
1652        gen_op_iwmmxt_setpsr_nz();
1653        gen_op_iwmmxt_movq_wRn_M0(wrd);
1654        gen_op_iwmmxt_set_mup();
1655        gen_op_iwmmxt_set_cup();
1656        break;
1657    case 0x111:						/* TMRC */
1658        if (insn & 0xf)
1659            return 1;
1660        rd = (insn >> 12) & 0xf;
1661        wrd = (insn >> 16) & 0xf;
1662        gen_op_iwmmxt_movl_T0_wCx(wrd);
1663        gen_movl_reg_T0(s, rd);
1664        break;
1665    case 0x300:						/* WANDN */
1666        wrd = (insn >> 12) & 0xf;
1667        rd0 = (insn >> 0) & 0xf;
1668        rd1 = (insn >> 16) & 0xf;
1669        gen_op_iwmmxt_movq_M0_wRn(rd0);
1670        tcg_gen_neg_i64(cpu_M0, cpu_M0);
1671        gen_op_iwmmxt_andq_M0_wRn(rd1);
1672        gen_op_iwmmxt_setpsr_nz();
1673        gen_op_iwmmxt_movq_wRn_M0(wrd);
1674        gen_op_iwmmxt_set_mup();
1675        gen_op_iwmmxt_set_cup();
1676        break;
1677    case 0x200:						/* WAND */
1678        wrd = (insn >> 12) & 0xf;
1679        rd0 = (insn >> 0) & 0xf;
1680        rd1 = (insn >> 16) & 0xf;
1681        gen_op_iwmmxt_movq_M0_wRn(rd0);
1682        gen_op_iwmmxt_andq_M0_wRn(rd1);
1683        gen_op_iwmmxt_setpsr_nz();
1684        gen_op_iwmmxt_movq_wRn_M0(wrd);
1685        gen_op_iwmmxt_set_mup();
1686        gen_op_iwmmxt_set_cup();
1687        break;
1688    case 0x810: case 0xa10:				/* WMADD */
1689        wrd = (insn >> 12) & 0xf;
1690        rd0 = (insn >> 0) & 0xf;
1691        rd1 = (insn >> 16) & 0xf;
1692        gen_op_iwmmxt_movq_M0_wRn(rd0);
1693        if (insn & (1 << 21))
1694            gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1695        else
1696            gen_op_iwmmxt_madduq_M0_wRn(rd1);
1697        gen_op_iwmmxt_movq_wRn_M0(wrd);
1698        gen_op_iwmmxt_set_mup();
1699        break;
1700    case 0x10e: case 0x50e: case 0x90e: case 0xd0e:	/* WUNPCKIL */
1701        wrd = (insn >> 12) & 0xf;
1702        rd0 = (insn >> 16) & 0xf;
1703        rd1 = (insn >> 0) & 0xf;
1704        gen_op_iwmmxt_movq_M0_wRn(rd0);
1705        switch ((insn >> 22) & 3) {
1706        case 0:
1707            gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1708            break;
1709        case 1:
1710            gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1711            break;
1712        case 2:
1713            gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1714            break;
1715        case 3:
1716            return 1;
1717        }
1718        gen_op_iwmmxt_movq_wRn_M0(wrd);
1719        gen_op_iwmmxt_set_mup();
1720        gen_op_iwmmxt_set_cup();
1721        break;
1722    case 0x10c: case 0x50c: case 0x90c: case 0xd0c:	/* WUNPCKIH */
1723        wrd = (insn >> 12) & 0xf;
1724        rd0 = (insn >> 16) & 0xf;
1725        rd1 = (insn >> 0) & 0xf;
1726        gen_op_iwmmxt_movq_M0_wRn(rd0);
1727        switch ((insn >> 22) & 3) {
1728        case 0:
1729            gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1730            break;
1731        case 1:
1732            gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1733            break;
1734        case 2:
1735            gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1736            break;
1737        case 3:
1738            return 1;
1739        }
1740        gen_op_iwmmxt_movq_wRn_M0(wrd);
1741        gen_op_iwmmxt_set_mup();
1742        gen_op_iwmmxt_set_cup();
1743        break;
1744    case 0x012: case 0x112: case 0x412: case 0x512:	/* WSAD */
1745        wrd = (insn >> 12) & 0xf;
1746        rd0 = (insn >> 16) & 0xf;
1747        rd1 = (insn >> 0) & 0xf;
1748        gen_op_iwmmxt_movq_M0_wRn(rd0);
1749        if (insn & (1 << 22))
1750            gen_op_iwmmxt_sadw_M0_wRn(rd1);
1751        else
1752            gen_op_iwmmxt_sadb_M0_wRn(rd1);
1753        if (!(insn & (1 << 20)))
1754            gen_op_iwmmxt_addl_M0_wRn(wrd);
1755        gen_op_iwmmxt_movq_wRn_M0(wrd);
1756        gen_op_iwmmxt_set_mup();
1757        break;
1758    case 0x010: case 0x110: case 0x210: case 0x310:	/* WMUL */
1759        wrd = (insn >> 12) & 0xf;
1760        rd0 = (insn >> 16) & 0xf;
1761        rd1 = (insn >> 0) & 0xf;
1762        gen_op_iwmmxt_movq_M0_wRn(rd0);
1763        if (insn & (1 << 21)) {
1764            if (insn & (1 << 20))
1765                gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1766            else
1767                gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1768        } else {
1769            if (insn & (1 << 20))
1770                gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1771            else
1772                gen_op_iwmmxt_mululw_M0_wRn(rd1);
1773        }
1774        gen_op_iwmmxt_movq_wRn_M0(wrd);
1775        gen_op_iwmmxt_set_mup();
1776        break;
1777    case 0x410: case 0x510: case 0x610: case 0x710:	/* WMAC */
1778        wrd = (insn >> 12) & 0xf;
1779        rd0 = (insn >> 16) & 0xf;
1780        rd1 = (insn >> 0) & 0xf;
1781        gen_op_iwmmxt_movq_M0_wRn(rd0);
1782        if (insn & (1 << 21))
1783            gen_op_iwmmxt_macsw_M0_wRn(rd1);
1784        else
1785            gen_op_iwmmxt_macuw_M0_wRn(rd1);
1786        if (!(insn & (1 << 20))) {
1787            iwmmxt_load_reg(cpu_V1, wrd);
1788            tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1789        }
1790        gen_op_iwmmxt_movq_wRn_M0(wrd);
1791        gen_op_iwmmxt_set_mup();
1792        break;
1793    case 0x006: case 0x406: case 0x806: case 0xc06:	/* WCMPEQ */
1794        wrd = (insn >> 12) & 0xf;
1795        rd0 = (insn >> 16) & 0xf;
1796        rd1 = (insn >> 0) & 0xf;
1797        gen_op_iwmmxt_movq_M0_wRn(rd0);
1798        switch ((insn >> 22) & 3) {
1799        case 0:
1800            gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1801            break;
1802        case 1:
1803            gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1804            break;
1805        case 2:
1806            gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1807            break;
1808        case 3:
1809            return 1;
1810        }
1811        gen_op_iwmmxt_movq_wRn_M0(wrd);
1812        gen_op_iwmmxt_set_mup();
1813        gen_op_iwmmxt_set_cup();
1814        break;
1815    case 0x800: case 0x900: case 0xc00: case 0xd00:	/* WAVG2 */
1816        wrd = (insn >> 12) & 0xf;
1817        rd0 = (insn >> 16) & 0xf;
1818        rd1 = (insn >> 0) & 0xf;
1819        gen_op_iwmmxt_movq_M0_wRn(rd0);
1820        if (insn & (1 << 22)) {
1821            if (insn & (1 << 20))
1822                gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1823            else
1824                gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1825        } else {
1826            if (insn & (1 << 20))
1827                gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1828            else
1829                gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1830        }
1831        gen_op_iwmmxt_movq_wRn_M0(wrd);
1832        gen_op_iwmmxt_set_mup();
1833        gen_op_iwmmxt_set_cup();
1834        break;
1835    case 0x802: case 0x902: case 0xa02: case 0xb02:	/* WALIGNR */
1836        wrd = (insn >> 12) & 0xf;
1837        rd0 = (insn >> 16) & 0xf;
1838        rd1 = (insn >> 0) & 0xf;
1839        gen_op_iwmmxt_movq_M0_wRn(rd0);
1840        gen_op_iwmmxt_movl_T0_wCx(ARM_IWMMXT_wCGR0 + ((insn >> 20) & 3));
1841        gen_op_movl_T1_im(7);
1842        gen_op_andl_T0_T1();
1843        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
1844        gen_op_iwmmxt_movq_wRn_M0(wrd);
1845        gen_op_iwmmxt_set_mup();
1846        break;
1847    case 0x601: case 0x605: case 0x609: case 0x60d:	/* TINSR */
1848        rd = (insn >> 12) & 0xf;
1849        wrd = (insn >> 16) & 0xf;
1850        gen_movl_T0_reg(s, rd);
1851        gen_op_iwmmxt_movq_M0_wRn(wrd);
1852        switch ((insn >> 6) & 3) {
1853        case 0:
1854            gen_op_movl_T1_im(0xff);
1855            gen_op_iwmmxt_insr_M0_T0_T1((insn & 7) << 3);
1856            break;
1857        case 1:
1858            gen_op_movl_T1_im(0xffff);
1859            gen_op_iwmmxt_insr_M0_T0_T1((insn & 3) << 4);
1860            break;
1861        case 2:
1862            gen_op_movl_T1_im(0xffffffff);
1863            gen_op_iwmmxt_insr_M0_T0_T1((insn & 1) << 5);
1864            break;
1865        case 3:
1866            return 1;
1867        }
1868        gen_op_iwmmxt_movq_wRn_M0(wrd);
1869        gen_op_iwmmxt_set_mup();
1870        break;
1871    case 0x107: case 0x507: case 0x907: case 0xd07:	/* TEXTRM */
1872        rd = (insn >> 12) & 0xf;
1873        wrd = (insn >> 16) & 0xf;
1874        if (rd == 15)
1875            return 1;
1876        gen_op_iwmmxt_movq_M0_wRn(wrd);
1877        switch ((insn >> 22) & 3) {
1878        case 0:
1879            if (insn & 8)
1880                gen_op_iwmmxt_extrsb_T0_M0((insn & 7) << 3);
1881            else {
1882                gen_op_iwmmxt_extru_T0_M0((insn & 7) << 3, 0xff);
1883            }
1884            break;
1885        case 1:
1886            if (insn & 8)
1887                gen_op_iwmmxt_extrsw_T0_M0((insn & 3) << 4);
1888            else {
1889                gen_op_iwmmxt_extru_T0_M0((insn & 3) << 4, 0xffff);
1890            }
1891            break;
1892        case 2:
1893            gen_op_iwmmxt_extru_T0_M0((insn & 1) << 5, ~0u);
1894            break;
1895        case 3:
1896            return 1;
1897        }
1898        gen_movl_reg_T0(s, rd);
1899        break;
1900    case 0x117: case 0x517: case 0x917: case 0xd17:	/* TEXTRC */
1901        if ((insn & 0x000ff008) != 0x0003f000)
1902            return 1;
1903        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1904        switch ((insn >> 22) & 3) {
1905        case 0:
1906            gen_op_shrl_T1_im(((insn & 7) << 2) + 0);
1907            break;
1908        case 1:
1909            gen_op_shrl_T1_im(((insn & 3) << 3) + 4);
1910            break;
1911        case 2:
1912            gen_op_shrl_T1_im(((insn & 1) << 4) + 12);
1913            break;
1914        case 3:
1915            return 1;
1916        }
1917        gen_op_shll_T1_im(28);
1918        gen_set_nzcv(cpu_T[1]);
1919        break;
1920    case 0x401: case 0x405: case 0x409: case 0x40d:	/* TBCST */
1921        rd = (insn >> 12) & 0xf;
1922        wrd = (insn >> 16) & 0xf;
1923        gen_movl_T0_reg(s, rd);
1924        switch ((insn >> 6) & 3) {
1925        case 0:
1926            gen_helper_iwmmxt_bcstb(cpu_M0, cpu_T[0]);
1927            break;
1928        case 1:
1929            gen_helper_iwmmxt_bcstw(cpu_M0, cpu_T[0]);
1930            break;
1931        case 2:
1932            gen_helper_iwmmxt_bcstl(cpu_M0, cpu_T[0]);
1933            break;
1934        case 3:
1935            return 1;
1936        }
1937        gen_op_iwmmxt_movq_wRn_M0(wrd);
1938        gen_op_iwmmxt_set_mup();
1939        break;
1940    case 0x113: case 0x513: case 0x913: case 0xd13:	/* TANDC */
1941        if ((insn & 0x000ff00f) != 0x0003f000)
1942            return 1;
1943        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1944        switch ((insn >> 22) & 3) {
1945        case 0:
1946            for (i = 0; i < 7; i ++) {
1947                gen_op_shll_T1_im(4);
1948                gen_op_andl_T0_T1();
1949            }
1950            break;
1951        case 1:
1952            for (i = 0; i < 3; i ++) {
1953                gen_op_shll_T1_im(8);
1954                gen_op_andl_T0_T1();
1955            }
1956            break;
1957        case 2:
1958            gen_op_shll_T1_im(16);
1959            gen_op_andl_T0_T1();
1960            break;
1961        case 3:
1962            return 1;
1963        }
1964        gen_set_nzcv(cpu_T[0]);
1965        break;
1966    case 0x01c: case 0x41c: case 0x81c: case 0xc1c:	/* WACC */
1967        wrd = (insn >> 12) & 0xf;
1968        rd0 = (insn >> 16) & 0xf;
1969        gen_op_iwmmxt_movq_M0_wRn(rd0);
1970        switch ((insn >> 22) & 3) {
1971        case 0:
1972            gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
1973            break;
1974        case 1:
1975            gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
1976            break;
1977        case 2:
1978            gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
1979            break;
1980        case 3:
1981            return 1;
1982        }
1983        gen_op_iwmmxt_movq_wRn_M0(wrd);
1984        gen_op_iwmmxt_set_mup();
1985        break;
1986    case 0x115: case 0x515: case 0x915: case 0xd15:	/* TORC */
1987        if ((insn & 0x000ff00f) != 0x0003f000)
1988            return 1;
1989        gen_op_iwmmxt_movl_T1_wCx(ARM_IWMMXT_wCASF);
1990        switch ((insn >> 22) & 3) {
1991        case 0:
1992            for (i = 0; i < 7; i ++) {
1993                gen_op_shll_T1_im(4);
1994                gen_op_orl_T0_T1();
1995            }
1996            break;
1997        case 1:
1998            for (i = 0; i < 3; i ++) {
1999                gen_op_shll_T1_im(8);
2000                gen_op_orl_T0_T1();
2001            }
2002            break;
2003        case 2:
2004            gen_op_shll_T1_im(16);
2005            gen_op_orl_T0_T1();
2006            break;
2007        case 3:
2008            return 1;
2009        }
2010        gen_set_nzcv(cpu_T[0]);
2011        break;
2012    case 0x103: case 0x503: case 0x903: case 0xd03:	/* TMOVMSK */
2013        rd = (insn >> 12) & 0xf;
2014        rd0 = (insn >> 16) & 0xf;
2015        if ((insn & 0xf) != 0)
2016            return 1;
2017        gen_op_iwmmxt_movq_M0_wRn(rd0);
2018        switch ((insn >> 22) & 3) {
2019        case 0:
2020            gen_helper_iwmmxt_msbb(cpu_T[0], cpu_M0);
2021            break;
2022        case 1:
2023            gen_helper_iwmmxt_msbw(cpu_T[0], cpu_M0);
2024            break;
2025        case 2:
2026            gen_helper_iwmmxt_msbl(cpu_T[0], cpu_M0);
2027            break;
2028        case 3:
2029            return 1;
2030        }
2031        gen_movl_reg_T0(s, rd);
2032        break;
2033    case 0x106: case 0x306: case 0x506: case 0x706:	/* WCMPGT */
2034    case 0x906: case 0xb06: case 0xd06: case 0xf06:
2035        wrd = (insn >> 12) & 0xf;
2036        rd0 = (insn >> 16) & 0xf;
2037        rd1 = (insn >> 0) & 0xf;
2038        gen_op_iwmmxt_movq_M0_wRn(rd0);
2039        switch ((insn >> 22) & 3) {
2040        case 0:
2041            if (insn & (1 << 21))
2042                gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2043            else
2044                gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2045            break;
2046        case 1:
2047            if (insn & (1 << 21))
2048                gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2049            else
2050                gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2051            break;
2052        case 2:
2053            if (insn & (1 << 21))
2054                gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2055            else
2056                gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2057            break;
2058        case 3:
2059            return 1;
2060        }
2061        gen_op_iwmmxt_movq_wRn_M0(wrd);
2062        gen_op_iwmmxt_set_mup();
2063        gen_op_iwmmxt_set_cup();
2064        break;
2065    case 0x00e: case 0x20e: case 0x40e: case 0x60e:	/* WUNPCKEL */
2066    case 0x80e: case 0xa0e: case 0xc0e: case 0xe0e:
2067        wrd = (insn >> 12) & 0xf;
2068        rd0 = (insn >> 16) & 0xf;
2069        gen_op_iwmmxt_movq_M0_wRn(rd0);
2070        switch ((insn >> 22) & 3) {
2071        case 0:
2072            if (insn & (1 << 21))
2073                gen_op_iwmmxt_unpacklsb_M0();
2074            else
2075                gen_op_iwmmxt_unpacklub_M0();
2076            break;
2077        case 1:
2078            if (insn & (1 << 21))
2079                gen_op_iwmmxt_unpacklsw_M0();
2080            else
2081                gen_op_iwmmxt_unpackluw_M0();
2082            break;
2083        case 2:
2084            if (insn & (1 << 21))
2085                gen_op_iwmmxt_unpacklsl_M0();
2086            else
2087                gen_op_iwmmxt_unpacklul_M0();
2088            break;
2089        case 3:
2090            return 1;
2091        }
2092        gen_op_iwmmxt_movq_wRn_M0(wrd);
2093        gen_op_iwmmxt_set_mup();
2094        gen_op_iwmmxt_set_cup();
2095        break;
2096    case 0x00c: case 0x20c: case 0x40c: case 0x60c:	/* WUNPCKEH */
2097    case 0x80c: case 0xa0c: case 0xc0c: case 0xe0c:
2098        wrd = (insn >> 12) & 0xf;
2099        rd0 = (insn >> 16) & 0xf;
2100        gen_op_iwmmxt_movq_M0_wRn(rd0);
2101        switch ((insn >> 22) & 3) {
2102        case 0:
2103            if (insn & (1 << 21))
2104                gen_op_iwmmxt_unpackhsb_M0();
2105            else
2106                gen_op_iwmmxt_unpackhub_M0();
2107            break;
2108        case 1:
2109            if (insn & (1 << 21))
2110                gen_op_iwmmxt_unpackhsw_M0();
2111            else
2112                gen_op_iwmmxt_unpackhuw_M0();
2113            break;
2114        case 2:
2115            if (insn & (1 << 21))
2116                gen_op_iwmmxt_unpackhsl_M0();
2117            else
2118                gen_op_iwmmxt_unpackhul_M0();
2119            break;
2120        case 3:
2121            return 1;
2122        }
2123        gen_op_iwmmxt_movq_wRn_M0(wrd);
2124        gen_op_iwmmxt_set_mup();
2125        gen_op_iwmmxt_set_cup();
2126        break;
2127    case 0x204: case 0x604: case 0xa04: case 0xe04:	/* WSRL */
2128    case 0x214: case 0x614: case 0xa14: case 0xe14:
2129        wrd = (insn >> 12) & 0xf;
2130        rd0 = (insn >> 16) & 0xf;
2131        gen_op_iwmmxt_movq_M0_wRn(rd0);
2132        if (gen_iwmmxt_shift(insn, 0xff))
2133            return 1;
2134        switch ((insn >> 22) & 3) {
2135        case 0:
2136            return 1;
2137        case 1:
2138            gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2139            break;
2140        case 2:
2141            gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2142            break;
2143        case 3:
2144            gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2145            break;
2146        }
2147        gen_op_iwmmxt_movq_wRn_M0(wrd);
2148        gen_op_iwmmxt_set_mup();
2149        gen_op_iwmmxt_set_cup();
2150        break;
2151    case 0x004: case 0x404: case 0x804: case 0xc04:	/* WSRA */
2152    case 0x014: case 0x414: case 0x814: case 0xc14:
2153        wrd = (insn >> 12) & 0xf;
2154        rd0 = (insn >> 16) & 0xf;
2155        gen_op_iwmmxt_movq_M0_wRn(rd0);
2156        if (gen_iwmmxt_shift(insn, 0xff))
2157            return 1;
2158        switch ((insn >> 22) & 3) {
2159        case 0:
2160            return 1;
2161        case 1:
2162            gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2163            break;
2164        case 2:
2165            gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2166            break;
2167        case 3:
2168            gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2169            break;
2170        }
2171        gen_op_iwmmxt_movq_wRn_M0(wrd);
2172        gen_op_iwmmxt_set_mup();
2173        gen_op_iwmmxt_set_cup();
2174        break;
2175    case 0x104: case 0x504: case 0x904: case 0xd04:	/* WSLL */
2176    case 0x114: case 0x514: case 0x914: case 0xd14:
2177        wrd = (insn >> 12) & 0xf;
2178        rd0 = (insn >> 16) & 0xf;
2179        gen_op_iwmmxt_movq_M0_wRn(rd0);
2180        if (gen_iwmmxt_shift(insn, 0xff))
2181            return 1;
2182        switch ((insn >> 22) & 3) {
2183        case 0:
2184            return 1;
2185        case 1:
2186            gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2187            break;
2188        case 2:
2189            gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2190            break;
2191        case 3:
2192            gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2193            break;
2194        }
2195        gen_op_iwmmxt_movq_wRn_M0(wrd);
2196        gen_op_iwmmxt_set_mup();
2197        gen_op_iwmmxt_set_cup();
2198        break;
2199    case 0x304: case 0x704: case 0xb04: case 0xf04:	/* WROR */
2200    case 0x314: case 0x714: case 0xb14: case 0xf14:
2201        wrd = (insn >> 12) & 0xf;
2202        rd0 = (insn >> 16) & 0xf;
2203        gen_op_iwmmxt_movq_M0_wRn(rd0);
2204        switch ((insn >> 22) & 3) {
2205        case 0:
2206            return 1;
2207        case 1:
2208            if (gen_iwmmxt_shift(insn, 0xf))
2209                return 1;
2210            gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2211            break;
2212        case 2:
2213            if (gen_iwmmxt_shift(insn, 0x1f))
2214                return 1;
2215            gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2216            break;
2217        case 3:
2218            if (gen_iwmmxt_shift(insn, 0x3f))
2219                return 1;
2220            gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2221            break;
2222        }
2223        gen_op_iwmmxt_movq_wRn_M0(wrd);
2224        gen_op_iwmmxt_set_mup();
2225        gen_op_iwmmxt_set_cup();
2226        break;
2227    case 0x116: case 0x316: case 0x516: case 0x716:	/* WMIN */
2228    case 0x916: case 0xb16: case 0xd16: case 0xf16:
2229        wrd = (insn >> 12) & 0xf;
2230        rd0 = (insn >> 16) & 0xf;
2231        rd1 = (insn >> 0) & 0xf;
2232        gen_op_iwmmxt_movq_M0_wRn(rd0);
2233        switch ((insn >> 22) & 3) {
2234        case 0:
2235            if (insn & (1 << 21))
2236                gen_op_iwmmxt_minsb_M0_wRn(rd1);
2237            else
2238                gen_op_iwmmxt_minub_M0_wRn(rd1);
2239            break;
2240        case 1:
2241            if (insn & (1 << 21))
2242                gen_op_iwmmxt_minsw_M0_wRn(rd1);
2243            else
2244                gen_op_iwmmxt_minuw_M0_wRn(rd1);
2245            break;
2246        case 2:
2247            if (insn & (1 << 21))
2248                gen_op_iwmmxt_minsl_M0_wRn(rd1);
2249            else
2250                gen_op_iwmmxt_minul_M0_wRn(rd1);
2251            break;
2252        case 3:
2253            return 1;
2254        }
2255        gen_op_iwmmxt_movq_wRn_M0(wrd);
2256        gen_op_iwmmxt_set_mup();
2257        break;
2258    case 0x016: case 0x216: case 0x416: case 0x616:	/* WMAX */
2259    case 0x816: case 0xa16: case 0xc16: case 0xe16:
2260        wrd = (insn >> 12) & 0xf;
2261        rd0 = (insn >> 16) & 0xf;
2262        rd1 = (insn >> 0) & 0xf;
2263        gen_op_iwmmxt_movq_M0_wRn(rd0);
2264        switch ((insn >> 22) & 3) {
2265        case 0:
2266            if (insn & (1 << 21))
2267                gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2268            else
2269                gen_op_iwmmxt_maxub_M0_wRn(rd1);
2270            break;
2271        case 1:
2272            if (insn & (1 << 21))
2273                gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2274            else
2275                gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2276            break;
2277        case 2:
2278            if (insn & (1 << 21))
2279                gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2280            else
2281                gen_op_iwmmxt_maxul_M0_wRn(rd1);
2282            break;
2283        case 3:
2284            return 1;
2285        }
2286        gen_op_iwmmxt_movq_wRn_M0(wrd);
2287        gen_op_iwmmxt_set_mup();
2288        break;
2289    case 0x002: case 0x102: case 0x202: case 0x302:	/* WALIGNI */
2290    case 0x402: case 0x502: case 0x602: case 0x702:
2291        wrd = (insn >> 12) & 0xf;
2292        rd0 = (insn >> 16) & 0xf;
2293        rd1 = (insn >> 0) & 0xf;
2294        gen_op_iwmmxt_movq_M0_wRn(rd0);
2295        gen_op_movl_T0_im((insn >> 20) & 3);
2296        gen_op_iwmmxt_align_M0_T0_wRn(rd1);
2297        gen_op_iwmmxt_movq_wRn_M0(wrd);
2298        gen_op_iwmmxt_set_mup();
2299        break;
2300    case 0x01a: case 0x11a: case 0x21a: case 0x31a:	/* WSUB */
2301    case 0x41a: case 0x51a: case 0x61a: case 0x71a:
2302    case 0x81a: case 0x91a: case 0xa1a: case 0xb1a:
2303    case 0xc1a: case 0xd1a: case 0xe1a: case 0xf1a:
2304        wrd = (insn >> 12) & 0xf;
2305        rd0 = (insn >> 16) & 0xf;
2306        rd1 = (insn >> 0) & 0xf;
2307        gen_op_iwmmxt_movq_M0_wRn(rd0);
2308        switch ((insn >> 20) & 0xf) {
2309        case 0x0:
2310            gen_op_iwmmxt_subnb_M0_wRn(rd1);
2311            break;
2312        case 0x1:
2313            gen_op_iwmmxt_subub_M0_wRn(rd1);
2314            break;
2315        case 0x3:
2316            gen_op_iwmmxt_subsb_M0_wRn(rd1);
2317            break;
2318        case 0x4:
2319            gen_op_iwmmxt_subnw_M0_wRn(rd1);
2320            break;
2321        case 0x5:
2322            gen_op_iwmmxt_subuw_M0_wRn(rd1);
2323            break;
2324        case 0x7:
2325            gen_op_iwmmxt_subsw_M0_wRn(rd1);
2326            break;
2327        case 0x8:
2328            gen_op_iwmmxt_subnl_M0_wRn(rd1);
2329            break;
2330        case 0x9:
2331            gen_op_iwmmxt_subul_M0_wRn(rd1);
2332            break;
2333        case 0xb:
2334            gen_op_iwmmxt_subsl_M0_wRn(rd1);
2335            break;
2336        default:
2337            return 1;
2338        }
2339        gen_op_iwmmxt_movq_wRn_M0(wrd);
2340        gen_op_iwmmxt_set_mup();
2341        gen_op_iwmmxt_set_cup();
2342        break;
2343    case 0x01e: case 0x11e: case 0x21e: case 0x31e:	/* WSHUFH */
2344    case 0x41e: case 0x51e: case 0x61e: case 0x71e:
2345    case 0x81e: case 0x91e: case 0xa1e: case 0xb1e:
2346    case 0xc1e: case 0xd1e: case 0xe1e: case 0xf1e:
2347        wrd = (insn >> 12) & 0xf;
2348        rd0 = (insn >> 16) & 0xf;
2349        gen_op_iwmmxt_movq_M0_wRn(rd0);
2350        gen_op_movl_T0_im(((insn >> 16) & 0xf0) | (insn & 0x0f));
2351        gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, cpu_T[0]);
2352        gen_op_iwmmxt_movq_wRn_M0(wrd);
2353        gen_op_iwmmxt_set_mup();
2354        gen_op_iwmmxt_set_cup();
2355        break;
2356    case 0x018: case 0x118: case 0x218: case 0x318:	/* WADD */
2357    case 0x418: case 0x518: case 0x618: case 0x718:
2358    case 0x818: case 0x918: case 0xa18: case 0xb18:
2359    case 0xc18: case 0xd18: case 0xe18: case 0xf18:
2360        wrd = (insn >> 12) & 0xf;
2361        rd0 = (insn >> 16) & 0xf;
2362        rd1 = (insn >> 0) & 0xf;
2363        gen_op_iwmmxt_movq_M0_wRn(rd0);
2364        switch ((insn >> 20) & 0xf) {
2365        case 0x0:
2366            gen_op_iwmmxt_addnb_M0_wRn(rd1);
2367            break;
2368        case 0x1:
2369            gen_op_iwmmxt_addub_M0_wRn(rd1);
2370            break;
2371        case 0x3:
2372            gen_op_iwmmxt_addsb_M0_wRn(rd1);
2373            break;
2374        case 0x4:
2375            gen_op_iwmmxt_addnw_M0_wRn(rd1);
2376            break;
2377        case 0x5:
2378            gen_op_iwmmxt_adduw_M0_wRn(rd1);
2379            break;
2380        case 0x7:
2381            gen_op_iwmmxt_addsw_M0_wRn(rd1);
2382            break;
2383        case 0x8:
2384            gen_op_iwmmxt_addnl_M0_wRn(rd1);
2385            break;
2386        case 0x9:
2387            gen_op_iwmmxt_addul_M0_wRn(rd1);
2388            break;
2389        case 0xb:
2390            gen_op_iwmmxt_addsl_M0_wRn(rd1);
2391            break;
2392        default:
2393            return 1;
2394        }
2395        gen_op_iwmmxt_movq_wRn_M0(wrd);
2396        gen_op_iwmmxt_set_mup();
2397        gen_op_iwmmxt_set_cup();
2398        break;
2399    case 0x008: case 0x108: case 0x208: case 0x308:	/* WPACK */
2400    case 0x408: case 0x508: case 0x608: case 0x708:
2401    case 0x808: case 0x908: case 0xa08: case 0xb08:
2402    case 0xc08: case 0xd08: case 0xe08: case 0xf08:
2403        wrd = (insn >> 12) & 0xf;
2404        rd0 = (insn >> 16) & 0xf;
2405        rd1 = (insn >> 0) & 0xf;
2406        gen_op_iwmmxt_movq_M0_wRn(rd0);
2407        if (!(insn & (1 << 20)))
2408            return 1;
2409        switch ((insn >> 22) & 3) {
2410        case 0:
2411            return 1;
2412        case 1:
2413            if (insn & (1 << 21))
2414                gen_op_iwmmxt_packsw_M0_wRn(rd1);
2415            else
2416                gen_op_iwmmxt_packuw_M0_wRn(rd1);
2417            break;
2418        case 2:
2419            if (insn & (1 << 21))
2420                gen_op_iwmmxt_packsl_M0_wRn(rd1);
2421            else
2422                gen_op_iwmmxt_packul_M0_wRn(rd1);
2423            break;
2424        case 3:
2425            if (insn & (1 << 21))
2426                gen_op_iwmmxt_packsq_M0_wRn(rd1);
2427            else
2428                gen_op_iwmmxt_packuq_M0_wRn(rd1);
2429            break;
2430        }
2431        gen_op_iwmmxt_movq_wRn_M0(wrd);
2432        gen_op_iwmmxt_set_mup();
2433        gen_op_iwmmxt_set_cup();
2434        break;
2435    case 0x201: case 0x203: case 0x205: case 0x207:
2436    case 0x209: case 0x20b: case 0x20d: case 0x20f:
2437    case 0x211: case 0x213: case 0x215: case 0x217:
2438    case 0x219: case 0x21b: case 0x21d: case 0x21f:
2439        wrd = (insn >> 5) & 0xf;
2440        rd0 = (insn >> 12) & 0xf;
2441        rd1 = (insn >> 0) & 0xf;
2442        if (rd0 == 0xf || rd1 == 0xf)
2443            return 1;
2444        gen_op_iwmmxt_movq_M0_wRn(wrd);
2445        switch ((insn >> 16) & 0xf) {
2446        case 0x0:					/* TMIA */
2447            gen_movl_T0_reg(s, rd0);
2448            gen_movl_T1_reg(s, rd1);
2449            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2450            break;
2451        case 0x8:					/* TMIAPH */
2452            gen_movl_T0_reg(s, rd0);
2453            gen_movl_T1_reg(s, rd1);
2454            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2455            break;
2456        case 0xc: case 0xd: case 0xe: case 0xf:		/* TMIAxy */
2457            gen_movl_T1_reg(s, rd0);
2458            if (insn & (1 << 16))
2459                gen_op_shrl_T1_im(16);
2460            gen_op_movl_T0_T1();
2461            gen_movl_T1_reg(s, rd1);
2462            if (insn & (1 << 17))
2463                gen_op_shrl_T1_im(16);
2464            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2465            break;
2466        default:
2467            return 1;
2468        }
2469        gen_op_iwmmxt_movq_wRn_M0(wrd);
2470        gen_op_iwmmxt_set_mup();
2471        break;
2472    default:
2473        return 1;
2474    }
2475
2476    return 0;
2477}
2478
2479/* Disassemble an XScale DSP instruction.  Returns nonzero if an error occured
2480   (ie. an undefined instruction).  */
2481static int disas_dsp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2482{
2483    int acc, rd0, rd1, rdhi, rdlo;
2484
2485    if ((insn & 0x0ff00f10) == 0x0e200010) {
2486        /* Multiply with Internal Accumulate Format */
2487        rd0 = (insn >> 12) & 0xf;
2488        rd1 = insn & 0xf;
2489        acc = (insn >> 5) & 7;
2490
2491        if (acc != 0)
2492            return 1;
2493
2494        switch ((insn >> 16) & 0xf) {
2495        case 0x0:					/* MIA */
2496            gen_movl_T0_reg(s, rd0);
2497            gen_movl_T1_reg(s, rd1);
2498            gen_op_iwmmxt_muladdsl_M0_T0_T1();
2499            break;
2500        case 0x8:					/* MIAPH */
2501            gen_movl_T0_reg(s, rd0);
2502            gen_movl_T1_reg(s, rd1);
2503            gen_op_iwmmxt_muladdsw_M0_T0_T1();
2504            break;
2505        case 0xc:					/* MIABB */
2506        case 0xd:					/* MIABT */
2507        case 0xe:					/* MIATB */
2508        case 0xf:					/* MIATT */
2509            gen_movl_T1_reg(s, rd0);
2510            if (insn & (1 << 16))
2511                gen_op_shrl_T1_im(16);
2512            gen_op_movl_T0_T1();
2513            gen_movl_T1_reg(s, rd1);
2514            if (insn & (1 << 17))
2515                gen_op_shrl_T1_im(16);
2516            gen_op_iwmmxt_muladdswl_M0_T0_T1();
2517            break;
2518        default:
2519            return 1;
2520        }
2521
2522        gen_op_iwmmxt_movq_wRn_M0(acc);
2523        return 0;
2524    }
2525
2526    if ((insn & 0x0fe00ff8) == 0x0c400000) {
2527        /* Internal Accumulator Access Format */
2528        rdhi = (insn >> 16) & 0xf;
2529        rdlo = (insn >> 12) & 0xf;
2530        acc = insn & 7;
2531
2532        if (acc != 0)
2533            return 1;
2534
2535        if (insn & ARM_CP_RW_BIT) {			/* MRA */
2536            gen_iwmmxt_movl_T0_T1_wRn(acc);
2537            gen_movl_reg_T0(s, rdlo);
2538            gen_op_movl_T0_im((1 << (40 - 32)) - 1);
2539            gen_op_andl_T0_T1();
2540            gen_movl_reg_T0(s, rdhi);
2541        } else {					/* MAR */
2542            gen_movl_T0_reg(s, rdlo);
2543            gen_movl_T1_reg(s, rdhi);
2544            gen_iwmmxt_movl_wRn_T0_T1(acc);
2545        }
2546        return 0;
2547    }
2548
2549    return 1;
2550}
2551
2552/* Disassemble system coprocessor instruction.  Return nonzero if
2553   instruction is not defined.  */
2554static int disas_cp_insn(CPUState *env, DisasContext *s, uint32_t insn)
2555{
2556    TCGv tmp;
2557    uint32_t rd = (insn >> 12) & 0xf;
2558    uint32_t cp = (insn >> 8) & 0xf;
2559    if (IS_USER(s)) {
2560        return 1;
2561    }
2562
2563    if (insn & ARM_CP_RW_BIT) {
2564        if (!env->cp[cp].cp_read)
2565            return 1;
2566        gen_set_pc_im(s->pc);
2567        tmp = new_tmp();
2568        gen_helper_get_cp(tmp, cpu_env, tcg_const_i32(insn));
2569        store_reg(s, rd, tmp);
2570    } else {
2571        if (!env->cp[cp].cp_write)
2572            return 1;
2573        gen_set_pc_im(s->pc);
2574        tmp = load_reg(s, rd);
2575        gen_helper_set_cp(cpu_env, tcg_const_i32(insn), tmp);
2576        dead_tmp(tmp);
2577    }
2578    return 0;
2579}
2580
2581static int cp15_user_ok(uint32_t insn)
2582{
2583    int cpn = (insn >> 16) & 0xf;
2584    int cpm = insn & 0xf;
2585    int op = ((insn >> 5) & 7) | ((insn >> 18) & 0x38);
2586
2587    if (cpn == 13 && cpm == 0) {
2588        /* TLS register.  */
2589        if (op == 2 || (op == 3 && (insn & ARM_CP_RW_BIT)))
2590            return 1;
2591    }
2592    if (cpn == 7) {
2593        /* ISB, DSB, DMB.  */
2594        if ((cpm == 5 && op == 4)
2595                || (cpm == 10 && (op == 4 || op == 5)))
2596            return 1;
2597    }
2598    return 0;
2599}
2600
2601/* Disassemble system coprocessor (cp15) instruction.  Return nonzero if
2602   instruction is not defined.  */
2603static int disas_cp15_insn(CPUState *env, DisasContext *s, uint32_t insn)
2604{
2605    uint32_t rd;
2606    TCGv tmp;
2607
2608    /* M profile cores use memory mapped registers instead of cp15.  */
2609    if (arm_feature(env, ARM_FEATURE_M))
2610	return 1;
2611
2612    if ((insn & (1 << 25)) == 0) {
2613        if (insn & (1 << 20)) {
2614            /* mrrc */
2615            return 1;
2616        }
2617        /* mcrr.  Used for block cache operations, so implement as no-op.  */
2618        return 0;
2619    }
2620    if ((insn & (1 << 4)) == 0) {
2621        /* cdp */
2622        return 1;
2623    }
2624    if (IS_USER(s) && !cp15_user_ok(insn)) {
2625        return 1;
2626    }
2627    if ((insn & 0x0fff0fff) == 0x0e070f90
2628        || (insn & 0x0fff0fff) == 0x0e070f58) {
2629        /* Wait for interrupt.  */
2630        gen_set_pc_im(s->pc);
2631        s->is_jmp = DISAS_WFI;
2632        return 0;
2633    }
2634    rd = (insn >> 12) & 0xf;
2635    if (insn & ARM_CP_RW_BIT) {
2636        tmp = new_tmp();
2637        gen_helper_get_cp15(tmp, cpu_env, tcg_const_i32(insn));
2638        /* If the destination register is r15 then sets condition codes.  */
2639        if (rd != 15)
2640            store_reg(s, rd, tmp);
2641        else
2642            dead_tmp(tmp);
2643    } else {
2644        tmp = load_reg(s, rd);
2645        gen_helper_set_cp15(cpu_env, tcg_const_i32(insn), tmp);
2646        dead_tmp(tmp);
2647        /* Normally we would always end the TB here, but Linux
2648         * arch/arm/mach-pxa/sleep.S expects two instructions following
2649         * an MMU enable to execute from cache.  Imitate this behaviour.  */
2650        if (!arm_feature(env, ARM_FEATURE_XSCALE) ||
2651                (insn & 0x0fff0fff) != 0x0e010f10)
2652            gen_lookup_tb(s);
2653    }
2654    return 0;
2655}
2656
2657#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2658#define VFP_SREG(insn, bigbit, smallbit) \
2659  ((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) | (((insn) >> (smallbit)) & 1))
2660#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2661    if (arm_feature(env, ARM_FEATURE_VFP3)) { \
2662        reg = (((insn) >> (bigbit)) & 0x0f) \
2663              | (((insn) >> ((smallbit) - 4)) & 0x10); \
2664    } else { \
2665        if (insn & (1 << (smallbit))) \
2666            return 1; \
2667        reg = ((insn) >> (bigbit)) & 0x0f; \
2668    }} while (0)
2669
2670#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2671#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2672#define VFP_SREG_N(insn) VFP_SREG(insn, 16,  7)
2673#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16,  7)
2674#define VFP_SREG_M(insn) VFP_SREG(insn,  0,  5)
2675#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn,  0,  5)
2676
2677/* Move between integer and VFP cores.  */
2678static TCGv gen_vfp_mrs(void)
2679{
2680    TCGv tmp = new_tmp();
2681    tcg_gen_mov_i32(tmp, cpu_F0s);
2682    return tmp;
2683}
2684
2685static void gen_vfp_msr(TCGv tmp)
2686{
2687    tcg_gen_mov_i32(cpu_F0s, tmp);
2688    dead_tmp(tmp);
2689}
2690
2691static inline int
2692vfp_enabled(CPUState * env)
2693{
2694    return ((env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)) != 0);
2695}
2696
2697static void gen_neon_dup_u8(TCGv var, int shift)
2698{
2699    TCGv tmp = new_tmp();
2700    if (shift)
2701        tcg_gen_shri_i32(var, var, shift);
2702    tcg_gen_ext8u_i32(var, var);
2703    tcg_gen_shli_i32(tmp, var, 8);
2704    tcg_gen_or_i32(var, var, tmp);
2705    tcg_gen_shli_i32(tmp, var, 16);
2706    tcg_gen_or_i32(var, var, tmp);
2707    dead_tmp(tmp);
2708}
2709
2710static void gen_neon_dup_low16(TCGv var)
2711{
2712    TCGv tmp = new_tmp();
2713    tcg_gen_ext16u_i32(var, var);
2714    tcg_gen_shli_i32(tmp, var, 16);
2715    tcg_gen_or_i32(var, var, tmp);
2716    dead_tmp(tmp);
2717}
2718
2719static void gen_neon_dup_high16(TCGv var)
2720{
2721    TCGv tmp = new_tmp();
2722    tcg_gen_andi_i32(var, var, 0xffff0000);
2723    tcg_gen_shri_i32(tmp, var, 16);
2724    tcg_gen_or_i32(var, var, tmp);
2725    dead_tmp(tmp);
2726}
2727
2728/* Disassemble a VFP instruction.  Returns nonzero if an error occured
2729   (ie. an undefined instruction).  */
2730static int disas_vfp_insn(CPUState * env, DisasContext *s, uint32_t insn)
2731{
2732    uint32_t rd, rn, rm, op, i, n, offset, delta_d, delta_m, bank_mask;
2733    int dp, veclen;
2734    TCGv tmp;
2735    TCGv tmp2;
2736
2737    if (!arm_feature(env, ARM_FEATURE_VFP))
2738        return 1;
2739
2740    if (!vfp_enabled(env)) {
2741        /* VFP disabled.  Only allow fmxr/fmrx to/from some control regs.  */
2742        if ((insn & 0x0fe00fff) != 0x0ee00a10)
2743            return 1;
2744        rn = (insn >> 16) & 0xf;
2745        if (rn != ARM_VFP_FPSID && rn != ARM_VFP_FPEXC
2746            && rn != ARM_VFP_MVFR1 && rn != ARM_VFP_MVFR0)
2747            return 1;
2748    }
2749    dp = ((insn & 0xf00) == 0xb00);
2750    switch ((insn >> 24) & 0xf) {
2751    case 0xe:
2752        if (insn & (1 << 4)) {
2753            /* single register transfer */
2754            rd = (insn >> 12) & 0xf;
2755            if (dp) {
2756                int size;
2757                int pass;
2758
2759                VFP_DREG_N(rn, insn);
2760                if (insn & 0xf)
2761                    return 1;
2762                if (insn & 0x00c00060
2763                    && !arm_feature(env, ARM_FEATURE_NEON))
2764                    return 1;
2765
2766                pass = (insn >> 21) & 1;
2767                if (insn & (1 << 22)) {
2768                    size = 0;
2769                    offset = ((insn >> 5) & 3) * 8;
2770                } else if (insn & (1 << 5)) {
2771                    size = 1;
2772                    offset = (insn & (1 << 6)) ? 16 : 0;
2773                } else {
2774                    size = 2;
2775                    offset = 0;
2776                }
2777                if (insn & ARM_CP_RW_BIT) {
2778                    /* vfp->arm */
2779                    tmp = neon_load_reg(rn, pass);
2780                    switch (size) {
2781                    case 0:
2782                        if (offset)
2783                            tcg_gen_shri_i32(tmp, tmp, offset);
2784                        if (insn & (1 << 23))
2785                            gen_uxtb(tmp);
2786                        else
2787                            gen_sxtb(tmp);
2788                        break;
2789                    case 1:
2790                        if (insn & (1 << 23)) {
2791                            if (offset) {
2792                                tcg_gen_shri_i32(tmp, tmp, 16);
2793                            } else {
2794                                gen_uxth(tmp);
2795                            }
2796                        } else {
2797                            if (offset) {
2798                                tcg_gen_sari_i32(tmp, tmp, 16);
2799                            } else {
2800                                gen_sxth(tmp);
2801                            }
2802                        }
2803                        break;
2804                    case 2:
2805                        break;
2806                    }
2807                    store_reg(s, rd, tmp);
2808                } else {
2809                    /* arm->vfp */
2810                    tmp = load_reg(s, rd);
2811                    if (insn & (1 << 23)) {
2812                        /* VDUP */
2813                        if (size == 0) {
2814                            gen_neon_dup_u8(tmp, 0);
2815                        } else if (size == 1) {
2816                            gen_neon_dup_low16(tmp);
2817                        }
2818                        for (n = 0; n <= pass * 2; n++) {
2819                            tmp2 = new_tmp();
2820                            tcg_gen_mov_i32(tmp2, tmp);
2821                            neon_store_reg(rn, n, tmp2);
2822                        }
2823                        neon_store_reg(rn, n, tmp);
2824                    } else {
2825                        /* VMOV */
2826                        switch (size) {
2827                        case 0:
2828                            tmp2 = neon_load_reg(rn, pass);
2829                            gen_bfi(tmp, tmp2, tmp, offset, 0xff);
2830                            dead_tmp(tmp2);
2831                            break;
2832                        case 1:
2833                            tmp2 = neon_load_reg(rn, pass);
2834                            gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
2835                            dead_tmp(tmp2);
2836                            break;
2837                        case 2:
2838                            break;
2839                        }
2840                        neon_store_reg(rn, pass, tmp);
2841                    }
2842                }
2843            } else { /* !dp */
2844                if ((insn & 0x6f) != 0x00)
2845                    return 1;
2846                rn = VFP_SREG_N(insn);
2847                if (insn & ARM_CP_RW_BIT) {
2848                    /* vfp->arm */
2849                    if (insn & (1 << 21)) {
2850                        /* system register */
2851                        rn >>= 1;
2852
2853                        switch (rn) {
2854                        case ARM_VFP_FPSID:
2855                            /* VFP2 allows access to FSID from userspace.
2856                               VFP3 restricts all id registers to privileged
2857                               accesses.  */
2858                            if (IS_USER(s)
2859                                && arm_feature(env, ARM_FEATURE_VFP3))
2860                                return 1;
2861                            tmp = load_cpu_field(vfp.xregs[rn]);
2862                            break;
2863                        case ARM_VFP_FPEXC:
2864                            if (IS_USER(s))
2865                                return 1;
2866                            tmp = load_cpu_field(vfp.xregs[rn]);
2867                            break;
2868                        case ARM_VFP_FPINST:
2869                        case ARM_VFP_FPINST2:
2870                            /* Not present in VFP3.  */
2871                            if (IS_USER(s)
2872                                || arm_feature(env, ARM_FEATURE_VFP3))
2873                                return 1;
2874                            tmp = load_cpu_field(vfp.xregs[rn]);
2875                            break;
2876                        case ARM_VFP_FPSCR:
2877                            if (rd == 15) {
2878                                tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
2879                                tcg_gen_andi_i32(tmp, tmp, 0xf0000000);
2880                            } else {
2881                                tmp = new_tmp();
2882                                gen_helper_vfp_get_fpscr(tmp, cpu_env);
2883                            }
2884                            break;
2885                        case ARM_VFP_MVFR0:
2886                        case ARM_VFP_MVFR1:
2887                            if (IS_USER(s)
2888                                || !arm_feature(env, ARM_FEATURE_VFP3))
2889                                return 1;
2890                            tmp = load_cpu_field(vfp.xregs[rn]);
2891                            break;
2892                        default:
2893                            return 1;
2894                        }
2895                    } else {
2896                        gen_mov_F0_vreg(0, rn);
2897                        tmp = gen_vfp_mrs();
2898                    }
2899                    if (rd == 15) {
2900                        /* Set the 4 flag bits in the CPSR.  */
2901                        gen_set_nzcv(tmp);
2902                        dead_tmp(tmp);
2903                    } else {
2904                        store_reg(s, rd, tmp);
2905                    }
2906                } else {
2907                    /* arm->vfp */
2908                    tmp = load_reg(s, rd);
2909                    if (insn & (1 << 21)) {
2910                        rn >>= 1;
2911                        /* system register */
2912                        switch (rn) {
2913                        case ARM_VFP_FPSID:
2914                        case ARM_VFP_MVFR0:
2915                        case ARM_VFP_MVFR1:
2916                            /* Writes are ignored.  */
2917                            break;
2918                        case ARM_VFP_FPSCR:
2919                            gen_helper_vfp_set_fpscr(cpu_env, tmp);
2920                            dead_tmp(tmp);
2921                            gen_lookup_tb(s);
2922                            break;
2923                        case ARM_VFP_FPEXC:
2924                            if (IS_USER(s))
2925                                return 1;
2926                            store_cpu_field(tmp, vfp.xregs[rn]);
2927                            gen_lookup_tb(s);
2928                            break;
2929                        case ARM_VFP_FPINST:
2930                        case ARM_VFP_FPINST2:
2931                            store_cpu_field(tmp, vfp.xregs[rn]);
2932                            break;
2933                        default:
2934                            return 1;
2935                        }
2936                    } else {
2937                        gen_vfp_msr(tmp);
2938                        gen_mov_vreg_F0(0, rn);
2939                    }
2940                }
2941            }
2942        } else {
2943            /* data processing */
2944            /* The opcode is in bits 23, 21, 20 and 6.  */
2945            op = ((insn >> 20) & 8) | ((insn >> 19) & 6) | ((insn >> 6) & 1);
2946            if (dp) {
2947                if (op == 15) {
2948                    /* rn is opcode */
2949                    rn = ((insn >> 15) & 0x1e) | ((insn >> 7) & 1);
2950                } else {
2951                    /* rn is register number */
2952                    VFP_DREG_N(rn, insn);
2953                }
2954
2955                if (op == 15 && (rn == 15 || rn > 17)) {
2956                    /* Integer or single precision destination.  */
2957                    rd = VFP_SREG_D(insn);
2958                } else {
2959                    VFP_DREG_D(rd, insn);
2960                }
2961
2962                if (op == 15 && (rn == 16 || rn == 17)) {
2963                    /* Integer source.  */
2964                    rm = ((insn << 1) & 0x1e) | ((insn >> 5) & 1);
2965                } else {
2966                    VFP_DREG_M(rm, insn);
2967                }
2968            } else {
2969                rn = VFP_SREG_N(insn);
2970                if (op == 15 && rn == 15) {
2971                    /* Double precision destination.  */
2972                    VFP_DREG_D(rd, insn);
2973                } else {
2974                    rd = VFP_SREG_D(insn);
2975                }
2976                rm = VFP_SREG_M(insn);
2977            }
2978
2979            veclen = env->vfp.vec_len;
2980            if (op == 15 && rn > 3)
2981                veclen = 0;
2982
2983            /* Shut up compiler warnings.  */
2984            delta_m = 0;
2985            delta_d = 0;
2986            bank_mask = 0;
2987
2988            if (veclen > 0) {
2989                if (dp)
2990                    bank_mask = 0xc;
2991                else
2992                    bank_mask = 0x18;
2993
2994                /* Figure out what type of vector operation this is.  */
2995                if ((rd & bank_mask) == 0) {
2996                    /* scalar */
2997                    veclen = 0;
2998                } else {
2999                    if (dp)
3000                        delta_d = (env->vfp.vec_stride >> 1) + 1;
3001                    else
3002                        delta_d = env->vfp.vec_stride + 1;
3003
3004                    if ((rm & bank_mask) == 0) {
3005                        /* mixed scalar/vector */
3006                        delta_m = 0;
3007                    } else {
3008                        /* vector */
3009                        delta_m = delta_d;
3010                    }
3011                }
3012            }
3013
3014            /* Load the initial operands.  */
3015            if (op == 15) {
3016                switch (rn) {
3017                case 16:
3018                case 17:
3019                    /* Integer source */
3020                    gen_mov_F0_vreg(0, rm);
3021                    break;
3022                case 8:
3023                case 9:
3024                    /* Compare */
3025                    gen_mov_F0_vreg(dp, rd);
3026                    gen_mov_F1_vreg(dp, rm);
3027                    break;
3028                case 10:
3029                case 11:
3030                    /* Compare with zero */
3031                    gen_mov_F0_vreg(dp, rd);
3032                    gen_vfp_F1_ld0(dp);
3033                    break;
3034                case 20:
3035                case 21:
3036                case 22:
3037                case 23:
3038                case 28:
3039                case 29:
3040                case 30:
3041                case 31:
3042                    /* Source and destination the same.  */
3043                    gen_mov_F0_vreg(dp, rd);
3044                    break;
3045                default:
3046                    /* One source operand.  */
3047                    gen_mov_F0_vreg(dp, rm);
3048                    break;
3049                }
3050            } else {
3051                /* Two source operands.  */
3052                gen_mov_F0_vreg(dp, rn);
3053                gen_mov_F1_vreg(dp, rm);
3054            }
3055
3056            for (;;) {
3057                /* Perform the calculation.  */
3058                switch (op) {
3059                case 0: /* mac: fd + (fn * fm) */
3060                    gen_vfp_mul(dp);
3061                    gen_mov_F1_vreg(dp, rd);
3062                    gen_vfp_add(dp);
3063                    break;
3064                case 1: /* nmac: fd - (fn * fm) */
3065                    gen_vfp_mul(dp);
3066                    gen_vfp_neg(dp);
3067                    gen_mov_F1_vreg(dp, rd);
3068                    gen_vfp_add(dp);
3069                    break;
3070                case 2: /* msc: -fd + (fn * fm) */
3071                    gen_vfp_mul(dp);
3072                    gen_mov_F1_vreg(dp, rd);
3073                    gen_vfp_sub(dp);
3074                    break;
3075                case 3: /* nmsc: -fd - (fn * fm)  */
3076                    gen_vfp_mul(dp);
3077                    gen_vfp_neg(dp);
3078                    gen_mov_F1_vreg(dp, rd);
3079                    gen_vfp_sub(dp);
3080                    break;
3081                case 4: /* mul: fn * fm */
3082                    gen_vfp_mul(dp);
3083                    break;
3084                case 5: /* nmul: -(fn * fm) */
3085                    gen_vfp_mul(dp);
3086                    gen_vfp_neg(dp);
3087                    break;
3088                case 6: /* add: fn + fm */
3089                    gen_vfp_add(dp);
3090                    break;
3091                case 7: /* sub: fn - fm */
3092                    gen_vfp_sub(dp);
3093                    break;
3094                case 8: /* div: fn / fm */
3095                    gen_vfp_div(dp);
3096                    break;
3097                case 14: /* fconst */
3098                    if (!arm_feature(env, ARM_FEATURE_VFP3))
3099                      return 1;
3100
3101                    n = (insn << 12) & 0x80000000;
3102                    i = ((insn >> 12) & 0x70) | (insn & 0xf);
3103                    if (dp) {
3104                        if (i & 0x40)
3105                            i |= 0x3f80;
3106                        else
3107                            i |= 0x4000;
3108                        n |= i << 16;
3109                        tcg_gen_movi_i64(cpu_F0d, ((uint64_t)n) << 32);
3110                    } else {
3111                        if (i & 0x40)
3112                            i |= 0x780;
3113                        else
3114                            i |= 0x800;
3115                        n |= i << 19;
3116                        tcg_gen_movi_i32(cpu_F0s, n);
3117                    }
3118                    break;
3119                case 15: /* extension space */
3120                    switch (rn) {
3121                    case 0: /* cpy */
3122                        /* no-op */
3123                        break;
3124                    case 1: /* abs */
3125                        gen_vfp_abs(dp);
3126                        break;
3127                    case 2: /* neg */
3128                        gen_vfp_neg(dp);
3129                        break;
3130                    case 3: /* sqrt */
3131                        gen_vfp_sqrt(dp);
3132                        break;
3133                    case 8: /* cmp */
3134                        gen_vfp_cmp(dp);
3135                        break;
3136                    case 9: /* cmpe */
3137                        gen_vfp_cmpe(dp);
3138                        break;
3139                    case 10: /* cmpz */
3140                        gen_vfp_cmp(dp);
3141                        break;
3142                    case 11: /* cmpez */
3143                        gen_vfp_F1_ld0(dp);
3144                        gen_vfp_cmpe(dp);
3145                        break;
3146                    case 15: /* single<->double conversion */
3147                        if (dp)
3148                            gen_helper_vfp_fcvtsd(cpu_F0s, cpu_F0d, cpu_env);
3149                        else
3150                            gen_helper_vfp_fcvtds(cpu_F0d, cpu_F0s, cpu_env);
3151                        break;
3152                    case 16: /* fuito */
3153                        gen_vfp_uito(dp);
3154                        break;
3155                    case 17: /* fsito */
3156                        gen_vfp_sito(dp);
3157                        break;
3158                    case 20: /* fshto */
3159                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3160                          return 1;
3161                        gen_vfp_shto(dp, 16 - rm);
3162                        break;
3163                    case 21: /* fslto */
3164                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3165                          return 1;
3166                        gen_vfp_slto(dp, 32 - rm);
3167                        break;
3168                    case 22: /* fuhto */
3169                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3170                          return 1;
3171                        gen_vfp_uhto(dp, 16 - rm);
3172                        break;
3173                    case 23: /* fulto */
3174                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3175                          return 1;
3176                        gen_vfp_ulto(dp, 32 - rm);
3177                        break;
3178                    case 24: /* ftoui */
3179                        gen_vfp_toui(dp);
3180                        break;
3181                    case 25: /* ftouiz */
3182                        gen_vfp_touiz(dp);
3183                        break;
3184                    case 26: /* ftosi */
3185                        gen_vfp_tosi(dp);
3186                        break;
3187                    case 27: /* ftosiz */
3188                        gen_vfp_tosiz(dp);
3189                        break;
3190                    case 28: /* ftosh */
3191                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3192                          return 1;
3193                        gen_vfp_tosh(dp, 16 - rm);
3194                        break;
3195                    case 29: /* ftosl */
3196                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3197                          return 1;
3198                        gen_vfp_tosl(dp, 32 - rm);
3199                        break;
3200                    case 30: /* ftouh */
3201                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3202                          return 1;
3203                        gen_vfp_touh(dp, 16 - rm);
3204                        break;
3205                    case 31: /* ftoul */
3206                        if (!arm_feature(env, ARM_FEATURE_VFP3))
3207                          return 1;
3208                        gen_vfp_toul(dp, 32 - rm);
3209                        break;
3210                    default: /* undefined */
3211                        printf ("rn:%d\n", rn);
3212                        return 1;
3213                    }
3214                    break;
3215                default: /* undefined */
3216                    printf ("op:%d\n", op);
3217                    return 1;
3218                }
3219
3220                /* Write back the result.  */
3221                if (op == 15 && (rn >= 8 && rn <= 11))
3222                    ; /* Comparison, do nothing.  */
3223                else if (op == 15 && rn > 17)
3224                    /* Integer result.  */
3225                    gen_mov_vreg_F0(0, rd);
3226                else if (op == 15 && rn == 15)
3227                    /* conversion */
3228                    gen_mov_vreg_F0(!dp, rd);
3229                else
3230                    gen_mov_vreg_F0(dp, rd);
3231
3232                /* break out of the loop if we have finished  */
3233                if (veclen == 0)
3234                    break;
3235
3236                if (op == 15 && delta_m == 0) {
3237                    /* single source one-many */
3238                    while (veclen--) {
3239                        rd = ((rd + delta_d) & (bank_mask - 1))
3240                             | (rd & bank_mask);
3241                        gen_mov_vreg_F0(dp, rd);
3242                    }
3243                    break;
3244                }
3245                /* Setup the next operands.  */
3246                veclen--;
3247                rd = ((rd + delta_d) & (bank_mask - 1))
3248                     | (rd & bank_mask);
3249
3250                if (op == 15) {
3251                    /* One source operand.  */
3252                    rm = ((rm + delta_m) & (bank_mask - 1))
3253                         | (rm & bank_mask);
3254                    gen_mov_F0_vreg(dp, rm);
3255                } else {
3256                    /* Two source operands.  */
3257                    rn = ((rn + delta_d) & (bank_mask - 1))
3258                         | (rn & bank_mask);
3259                    gen_mov_F0_vreg(dp, rn);
3260                    if (delta_m) {
3261                        rm = ((rm + delta_m) & (bank_mask - 1))
3262                             | (rm & bank_mask);
3263                        gen_mov_F1_vreg(dp, rm);
3264                    }
3265                }
3266            }
3267        }
3268        break;
3269    case 0xc:
3270    case 0xd:
3271        if (dp && (insn & 0x03e00000) == 0x00400000) {
3272            /* two-register transfer */
3273            rn = (insn >> 16) & 0xf;
3274            rd = (insn >> 12) & 0xf;
3275            if (dp) {
3276                VFP_DREG_M(rm, insn);
3277            } else {
3278                rm = VFP_SREG_M(insn);
3279            }
3280
3281            if (insn & ARM_CP_RW_BIT) {
3282                /* vfp->arm */
3283                if (dp) {
3284                    gen_mov_F0_vreg(0, rm * 2);
3285                    tmp = gen_vfp_mrs();
3286                    store_reg(s, rd, tmp);
3287                    gen_mov_F0_vreg(0, rm * 2 + 1);
3288                    tmp = gen_vfp_mrs();
3289                    store_reg(s, rn, tmp);
3290                } else {
3291                    gen_mov_F0_vreg(0, rm);
3292                    tmp = gen_vfp_mrs();
3293                    store_reg(s, rn, tmp);
3294                    gen_mov_F0_vreg(0, rm + 1);
3295                    tmp = gen_vfp_mrs();
3296                    store_reg(s, rd, tmp);
3297                }
3298            } else {
3299                /* arm->vfp */
3300                if (dp) {
3301                    tmp = load_reg(s, rd);
3302                    gen_vfp_msr(tmp);
3303                    gen_mov_vreg_F0(0, rm * 2);
3304                    tmp = load_reg(s, rn);
3305                    gen_vfp_msr(tmp);
3306                    gen_mov_vreg_F0(0, rm * 2 + 1);
3307                } else {
3308                    tmp = load_reg(s, rn);
3309                    gen_vfp_msr(tmp);
3310                    gen_mov_vreg_F0(0, rm);
3311                    tmp = load_reg(s, rd);
3312                    gen_vfp_msr(tmp);
3313                    gen_mov_vreg_F0(0, rm + 1);
3314                }
3315            }
3316        } else {
3317            /* Load/store */
3318            rn = (insn >> 16) & 0xf;
3319            if (dp)
3320                VFP_DREG_D(rd, insn);
3321            else
3322                rd = VFP_SREG_D(insn);
3323            if (s->thumb && rn == 15) {
3324                gen_op_movl_T1_im(s->pc & ~2);
3325            } else {
3326                gen_movl_T1_reg(s, rn);
3327            }
3328            if ((insn & 0x01200000) == 0x01000000) {
3329                /* Single load/store */
3330                offset = (insn & 0xff) << 2;
3331                if ((insn & (1 << 23)) == 0)
3332                    offset = -offset;
3333                gen_op_addl_T1_im(offset);
3334                if (insn & (1 << 20)) {
3335                    gen_vfp_ld(s, dp);
3336                    gen_mov_vreg_F0(dp, rd);
3337                } else {
3338                    gen_mov_F0_vreg(dp, rd);
3339                    gen_vfp_st(s, dp);
3340                }
3341            } else {
3342                /* load/store multiple */
3343                if (dp)
3344                    n = (insn >> 1) & 0x7f;
3345                else
3346                    n = insn & 0xff;
3347
3348                if (insn & (1 << 24)) /* pre-decrement */
3349                    gen_op_addl_T1_im(-((insn & 0xff) << 2));
3350
3351                if (dp)
3352                    offset = 8;
3353                else
3354                    offset = 4;
3355                for (i = 0; i < n; i++) {
3356                    if (insn & ARM_CP_RW_BIT) {
3357                        /* load */
3358                        gen_vfp_ld(s, dp);
3359                        gen_mov_vreg_F0(dp, rd + i);
3360                    } else {
3361                        /* store */
3362                        gen_mov_F0_vreg(dp, rd + i);
3363                        gen_vfp_st(s, dp);
3364                    }
3365                    gen_op_addl_T1_im(offset);
3366                }
3367                if (insn & (1 << 21)) {
3368                    /* writeback */
3369                    if (insn & (1 << 24))
3370                        offset = -offset * n;
3371                    else if (dp && (insn & 1))
3372                        offset = 4;
3373                    else
3374                        offset = 0;
3375
3376                    if (offset != 0)
3377                        gen_op_addl_T1_im(offset);
3378                    gen_movl_reg_T1(s, rn);
3379                }
3380            }
3381        }
3382        break;
3383    default:
3384        /* Should never happen.  */
3385        return 1;
3386    }
3387    return 0;
3388}
3389
3390static inline void gen_goto_tb(DisasContext *s, int n, uint32_t dest)
3391{
3392    TranslationBlock *tb;
3393
3394    tb = s->tb;
3395    if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
3396        tcg_gen_goto_tb(n);
3397        gen_set_pc_im(dest);
3398        tcg_gen_exit_tb((long)tb + n);
3399    } else {
3400        gen_set_pc_im(dest);
3401        tcg_gen_exit_tb(0);
3402    }
3403}
3404
3405static inline void gen_jmp (DisasContext *s, uint32_t dest)
3406{
3407    if (unlikely(s->singlestep_enabled)) {
3408        /* An indirect jump so that we still trigger the debug exception.  */
3409        if (s->thumb)
3410            dest |= 1;
3411        gen_bx_im(s, dest);
3412    } else {
3413        gen_goto_tb(s, 0, dest);
3414        s->is_jmp = DISAS_TB_JUMP;
3415    }
3416}
3417
3418static inline void gen_mulxy(TCGv t0, TCGv t1, int x, int y)
3419{
3420    if (x)
3421        tcg_gen_sari_i32(t0, t0, 16);
3422    else
3423        gen_sxth(t0);
3424    if (y)
3425        tcg_gen_sari_i32(t1, t1, 16);
3426    else
3427        gen_sxth(t1);
3428    tcg_gen_mul_i32(t0, t0, t1);
3429}
3430
3431/* Return the mask of PSR bits set by a MSR instruction.  */
3432static uint32_t msr_mask(CPUState *env, DisasContext *s, int flags, int spsr) {
3433    uint32_t mask;
3434
3435    mask = 0;
3436    if (flags & (1 << 0))
3437        mask |= 0xff;
3438    if (flags & (1 << 1))
3439        mask |= 0xff00;
3440    if (flags & (1 << 2))
3441        mask |= 0xff0000;
3442    if (flags & (1 << 3))
3443        mask |= 0xff000000;
3444
3445    /* Mask out undefined bits.  */
3446    mask &= ~CPSR_RESERVED;
3447    if (!arm_feature(env, ARM_FEATURE_V6))
3448        mask &= ~(CPSR_E | CPSR_GE);
3449    if (!arm_feature(env, ARM_FEATURE_THUMB2))
3450        mask &= ~CPSR_IT;
3451    /* Mask out execution state bits.  */
3452    if (!spsr)
3453        mask &= ~CPSR_EXEC;
3454    /* Mask out privileged bits.  */
3455    if (IS_USER(s))
3456        mask &= CPSR_USER;
3457    return mask;
3458}
3459
3460/* Returns nonzero if access to the PSR is not permitted.  */
3461static int gen_set_psr_T0(DisasContext *s, uint32_t mask, int spsr)
3462{
3463    TCGv tmp;
3464    if (spsr) {
3465        /* ??? This is also undefined in system mode.  */
3466        if (IS_USER(s))
3467            return 1;
3468
3469        tmp = load_cpu_field(spsr);
3470        tcg_gen_andi_i32(tmp, tmp, ~mask);
3471        tcg_gen_andi_i32(cpu_T[0], cpu_T[0], mask);
3472        tcg_gen_or_i32(tmp, tmp, cpu_T[0]);
3473        store_cpu_field(tmp, spsr);
3474    } else {
3475        gen_set_cpsr(cpu_T[0], mask);
3476    }
3477    gen_lookup_tb(s);
3478    return 0;
3479}
3480
3481/* Generate an old-style exception return. Marks pc as dead. */
3482static void gen_exception_return(DisasContext *s, TCGv pc)
3483{
3484    TCGv tmp;
3485    store_reg(s, 15, pc);
3486    tmp = load_cpu_field(spsr);
3487    gen_set_cpsr(tmp, 0xffffffff);
3488    dead_tmp(tmp);
3489    s->is_jmp = DISAS_UPDATE;
3490}
3491
3492/* Generate a v6 exception return.  Marks both values as dead.  */
3493static void gen_rfe(DisasContext *s, TCGv pc, TCGv cpsr)
3494{
3495    gen_set_cpsr(cpsr, 0xffffffff);
3496    dead_tmp(cpsr);
3497    store_reg(s, 15, pc);
3498    s->is_jmp = DISAS_UPDATE;
3499}
3500
3501static inline void
3502gen_set_condexec (DisasContext *s)
3503{
3504    if (s->condexec_mask) {
3505        uint32_t val = (s->condexec_cond << 4) | (s->condexec_mask >> 1);
3506        TCGv tmp = new_tmp();
3507        tcg_gen_movi_i32(tmp, val);
3508        store_cpu_field(tmp, condexec_bits);
3509    }
3510    else if (s->condexec_mask_prev != 0) {
3511        TCGv tmp = new_tmp();
3512        tcg_gen_movi_i32(tmp, 0);
3513        store_cpu_field(tmp, condexec_bits);
3514    }
3515}
3516
3517static void gen_nop_hint(DisasContext *s, int val)
3518{
3519    switch (val) {
3520    case 3: /* wfi */
3521        gen_set_pc_im(s->pc);
3522        s->is_jmp = DISAS_WFI;
3523        break;
3524    case 2: /* wfe */
3525    case 4: /* sev */
3526        /* TODO: Implement SEV and WFE.  May help SMP performance.  */
3527    default: /* nop */
3528        break;
3529    }
3530}
3531
3532/* These macros help make the code more readable when migrating from the
3533   old dyngen helpers.  They should probably be removed when
3534   T0/T1 are removed.  */
3535#define CPU_T001 cpu_T[0], cpu_T[0], cpu_T[1]
3536#define CPU_T0E01 cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]
3537
3538#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3539
3540static inline int gen_neon_add(int size)
3541{
3542    switch (size) {
3543    case 0: gen_helper_neon_add_u8(CPU_T001); break;
3544    case 1: gen_helper_neon_add_u16(CPU_T001); break;
3545    case 2: gen_op_addl_T0_T1(); break;
3546    default: return 1;
3547    }
3548    return 0;
3549}
3550
3551static inline void gen_neon_rsb(int size)
3552{
3553    switch (size) {
3554    case 0: gen_helper_neon_sub_u8(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3555    case 1: gen_helper_neon_sub_u16(cpu_T[0], cpu_T[1], cpu_T[0]); break;
3556    case 2: gen_op_rsbl_T0_T1(); break;
3557    default: return;
3558    }
3559}
3560
3561/* 32-bit pairwise ops end up the same as the elementwise versions.  */
3562#define gen_helper_neon_pmax_s32  gen_helper_neon_max_s32
3563#define gen_helper_neon_pmax_u32  gen_helper_neon_max_u32
3564#define gen_helper_neon_pmin_s32  gen_helper_neon_min_s32
3565#define gen_helper_neon_pmin_u32  gen_helper_neon_min_u32
3566
3567/* FIXME: This is wrong.  They set the wrong overflow bit.  */
3568#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c)
3569#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c)
3570#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c)
3571#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c)
3572
3573#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3574    switch ((size << 1) | u) { \
3575    case 0: \
3576        gen_helper_neon_##name##_s8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3577        break; \
3578    case 1: \
3579        gen_helper_neon_##name##_u8(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3580        break; \
3581    case 2: \
3582        gen_helper_neon_##name##_s16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3583        break; \
3584    case 3: \
3585        gen_helper_neon_##name##_u16(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3586        break; \
3587    case 4: \
3588        gen_helper_neon_##name##_s32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3589        break; \
3590    case 5: \
3591        gen_helper_neon_##name##_u32(cpu_T[0], cpu_env, cpu_T[0], cpu_T[1]); \
3592        break; \
3593    default: return 1; \
3594    }} while (0)
3595
3596#define GEN_NEON_INTEGER_OP(name) do { \
3597    switch ((size << 1) | u) { \
3598    case 0: \
3599        gen_helper_neon_##name##_s8(cpu_T[0], cpu_T[0], cpu_T[1]); \
3600        break; \
3601    case 1: \
3602        gen_helper_neon_##name##_u8(cpu_T[0], cpu_T[0], cpu_T[1]); \
3603        break; \
3604    case 2: \
3605        gen_helper_neon_##name##_s16(cpu_T[0], cpu_T[0], cpu_T[1]); \
3606        break; \
3607    case 3: \
3608        gen_helper_neon_##name##_u16(cpu_T[0], cpu_T[0], cpu_T[1]); \
3609        break; \
3610    case 4: \
3611        gen_helper_neon_##name##_s32(cpu_T[0], cpu_T[0], cpu_T[1]); \
3612        break; \
3613    case 5: \
3614        gen_helper_neon_##name##_u32(cpu_T[0], cpu_T[0], cpu_T[1]); \
3615        break; \
3616    default: return 1; \
3617    }} while (0)
3618
3619static inline void
3620gen_neon_movl_scratch_T0(int scratch)
3621{
3622  uint32_t offset;
3623
3624  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3625  tcg_gen_st_i32(cpu_T[0], cpu_env, offset);
3626}
3627
3628static inline void
3629gen_neon_movl_scratch_T1(int scratch)
3630{
3631  uint32_t offset;
3632
3633  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3634  tcg_gen_st_i32(cpu_T[1], cpu_env, offset);
3635}
3636
3637static inline void
3638gen_neon_movl_T0_scratch(int scratch)
3639{
3640  uint32_t offset;
3641
3642  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3643  tcg_gen_ld_i32(cpu_T[0], cpu_env, offset);
3644}
3645
3646static inline void
3647gen_neon_movl_T1_scratch(int scratch)
3648{
3649  uint32_t offset;
3650
3651  offset = offsetof(CPUARMState, vfp.scratch[scratch]);
3652  tcg_gen_ld_i32(cpu_T[1], cpu_env, offset);
3653}
3654
3655static inline void gen_neon_get_scalar(int size, int reg)
3656{
3657    if (size == 1) {
3658        NEON_GET_REG(T0, reg >> 1, reg & 1);
3659    } else {
3660        NEON_GET_REG(T0, reg >> 2, (reg >> 1) & 1);
3661        if (reg & 1)
3662            gen_neon_dup_low16(cpu_T[0]);
3663        else
3664            gen_neon_dup_high16(cpu_T[0]);
3665    }
3666}
3667
3668static void gen_neon_unzip(int reg, int q, int tmp, int size)
3669{
3670    int n;
3671
3672    for (n = 0; n < q + 1; n += 2) {
3673        NEON_GET_REG(T0, reg, n);
3674        NEON_GET_REG(T0, reg, n + n);
3675        switch (size) {
3676        case 0: gen_helper_neon_unzip_u8(); break;
3677        case 1: gen_helper_neon_zip_u16(); break; /* zip and unzip are the same.  */
3678        case 2: /* no-op */; break;
3679        default: abort();
3680        }
3681        gen_neon_movl_scratch_T0(tmp + n);
3682        gen_neon_movl_scratch_T1(tmp + n + 1);
3683    }
3684}
3685
3686static struct {
3687    int nregs;
3688    int interleave;
3689    int spacing;
3690} neon_ls_element_type[11] = {
3691    {4, 4, 1},
3692    {4, 4, 2},
3693    {4, 1, 1},
3694    {4, 2, 1},
3695    {3, 3, 1},
3696    {3, 3, 2},
3697    {3, 1, 1},
3698    {1, 1, 1},
3699    {2, 2, 1},
3700    {2, 2, 2},
3701    {2, 1, 1}
3702};
3703
3704/* Translate a NEON load/store element instruction.  Return nonzero if the
3705   instruction is invalid.  */
3706static int disas_neon_ls_insn(CPUState * env, DisasContext *s, uint32_t insn)
3707{
3708    int rd, rn, rm;
3709    int op;
3710    int nregs;
3711    int interleave;
3712    int stride;
3713    int size;
3714    int reg;
3715    int pass;
3716    int load;
3717    int shift;
3718    int n;
3719    TCGv tmp;
3720    TCGv tmp2;
3721
3722    if (!vfp_enabled(env))
3723      return 1;
3724    VFP_DREG_D(rd, insn);
3725    rn = (insn >> 16) & 0xf;
3726    rm = insn & 0xf;
3727    load = (insn & (1 << 21)) != 0;
3728    if ((insn & (1 << 23)) == 0) {
3729        /* Load store all elements.  */
3730        op = (insn >> 8) & 0xf;
3731        size = (insn >> 6) & 3;
3732        if (op > 10 || size == 3)
3733            return 1;
3734        nregs = neon_ls_element_type[op].nregs;
3735        interleave = neon_ls_element_type[op].interleave;
3736        gen_movl_T1_reg(s, rn);
3737        stride = (1 << size) * interleave;
3738        for (reg = 0; reg < nregs; reg++) {
3739            if (interleave > 2 || (interleave == 2 && nregs == 2)) {
3740                gen_movl_T1_reg(s, rn);
3741                gen_op_addl_T1_im((1 << size) * reg);
3742            } else if (interleave == 2 && nregs == 4 && reg == 2) {
3743                gen_movl_T1_reg(s, rn);
3744                gen_op_addl_T1_im(1 << size);
3745            }
3746            for (pass = 0; pass < 2; pass++) {
3747                if (size == 2) {
3748                    if (load) {
3749                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3750                        neon_store_reg(rd, pass, tmp);
3751                    } else {
3752                        tmp = neon_load_reg(rd, pass);
3753                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3754                    }
3755                    gen_op_addl_T1_im(stride);
3756                } else if (size == 1) {
3757                    if (load) {
3758                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3759                        gen_op_addl_T1_im(stride);
3760                        tmp2 = gen_ld16u(cpu_T[1], IS_USER(s));
3761                        gen_op_addl_T1_im(stride);
3762                        gen_bfi(tmp, tmp, tmp2, 16, 0xffff);
3763                        dead_tmp(tmp2);
3764                        neon_store_reg(rd, pass, tmp);
3765                    } else {
3766                        tmp = neon_load_reg(rd, pass);
3767                        tmp2 = new_tmp();
3768                        tcg_gen_shri_i32(tmp2, tmp, 16);
3769                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3770                        gen_op_addl_T1_im(stride);
3771                        gen_st16(tmp2, cpu_T[1], IS_USER(s));
3772                        gen_op_addl_T1_im(stride);
3773                    }
3774                } else /* size == 0 */ {
3775                    if (load) {
3776                        TCGV_UNUSED(tmp2);
3777                        for (n = 0; n < 4; n++) {
3778                            tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3779                            gen_op_addl_T1_im(stride);
3780                            if (n == 0) {
3781                                tmp2 = tmp;
3782                            } else {
3783                                gen_bfi(tmp2, tmp2, tmp, n * 8, 0xff);
3784                                dead_tmp(tmp);
3785                            }
3786                        }
3787                        neon_store_reg(rd, pass, tmp2);
3788                    } else {
3789                        tmp2 = neon_load_reg(rd, pass);
3790                        for (n = 0; n < 4; n++) {
3791                            tmp = new_tmp();
3792                            if (n == 0) {
3793                                tcg_gen_mov_i32(tmp, tmp2);
3794                            } else {
3795                                tcg_gen_shri_i32(tmp, tmp2, n * 8);
3796                            }
3797                            gen_st8(tmp, cpu_T[1], IS_USER(s));
3798                            gen_op_addl_T1_im(stride);
3799                        }
3800                        dead_tmp(tmp2);
3801                    }
3802                }
3803            }
3804            rd += neon_ls_element_type[op].spacing;
3805        }
3806        stride = nregs * 8;
3807    } else {
3808        size = (insn >> 10) & 3;
3809        if (size == 3) {
3810            /* Load single element to all lanes.  */
3811            if (!load)
3812                return 1;
3813            size = (insn >> 6) & 3;
3814            nregs = ((insn >> 8) & 3) + 1;
3815            stride = (insn & (1 << 5)) ? 2 : 1;
3816            gen_movl_T1_reg(s, rn);
3817            for (reg = 0; reg < nregs; reg++) {
3818                switch (size) {
3819                case 0:
3820                    tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3821                    gen_neon_dup_u8(tmp, 0);
3822                    break;
3823                case 1:
3824                    tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3825                    gen_neon_dup_low16(tmp);
3826                    break;
3827                case 2:
3828                    tmp = gen_ld32(cpu_T[1], IS_USER(s));
3829                    break;
3830                case 3:
3831                    return 1;
3832                default: /* Avoid compiler warnings.  */
3833                    abort();
3834                }
3835                gen_op_addl_T1_im(1 << size);
3836                tmp2 = new_tmp();
3837                tcg_gen_mov_i32(tmp2, tmp);
3838                neon_store_reg(rd, 0, tmp2);
3839                neon_store_reg(rd, 1, tmp);
3840                rd += stride;
3841            }
3842            stride = (1 << size) * nregs;
3843        } else {
3844            /* Single element.  */
3845            pass = (insn >> 7) & 1;
3846            switch (size) {
3847            case 0:
3848                shift = ((insn >> 5) & 3) * 8;
3849                stride = 1;
3850                break;
3851            case 1:
3852                shift = ((insn >> 6) & 1) * 16;
3853                stride = (insn & (1 << 5)) ? 2 : 1;
3854                break;
3855            case 2:
3856                shift = 0;
3857                stride = (insn & (1 << 6)) ? 2 : 1;
3858                break;
3859            default:
3860                abort();
3861            }
3862            nregs = ((insn >> 8) & 3) + 1;
3863            gen_movl_T1_reg(s, rn);
3864            for (reg = 0; reg < nregs; reg++) {
3865                if (load) {
3866                    switch (size) {
3867                    case 0:
3868                        tmp = gen_ld8u(cpu_T[1], IS_USER(s));
3869                        break;
3870                    case 1:
3871                        tmp = gen_ld16u(cpu_T[1], IS_USER(s));
3872                        break;
3873                    case 2:
3874                        tmp = gen_ld32(cpu_T[1], IS_USER(s));
3875                        break;
3876                    default: /* Avoid compiler warnings.  */
3877                        abort();
3878                    }
3879                    if (size != 2) {
3880                        tmp2 = neon_load_reg(rd, pass);
3881                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
3882                        dead_tmp(tmp2);
3883                    }
3884                    neon_store_reg(rd, pass, tmp);
3885                } else { /* Store */
3886                    tmp = neon_load_reg(rd, pass);
3887                    if (shift)
3888                        tcg_gen_shri_i32(tmp, tmp, shift);
3889                    switch (size) {
3890                    case 0:
3891                        gen_st8(tmp, cpu_T[1], IS_USER(s));
3892                        break;
3893                    case 1:
3894                        gen_st16(tmp, cpu_T[1], IS_USER(s));
3895                        break;
3896                    case 2:
3897                        gen_st32(tmp, cpu_T[1], IS_USER(s));
3898                        break;
3899                    }
3900                }
3901                rd += stride;
3902                gen_op_addl_T1_im(1 << size);
3903            }
3904            stride = nregs * (1 << size);
3905        }
3906    }
3907    if (rm != 15) {
3908        TCGv base;
3909
3910        base = load_reg(s, rn);
3911        if (rm == 13) {
3912            tcg_gen_addi_i32(base, base, stride);
3913        } else {
3914            TCGv index;
3915            index = load_reg(s, rm);
3916            tcg_gen_add_i32(base, base, index);
3917            dead_tmp(index);
3918        }
3919        store_reg(s, rn, base);
3920    }
3921    return 0;
3922}
3923
3924/* Bitwise select.  dest = c ? t : f.  Clobbers T and F.  */
3925static void gen_neon_bsl(TCGv dest, TCGv t, TCGv f, TCGv c)
3926{
3927    tcg_gen_and_i32(t, t, c);
3928    tcg_gen_bic_i32(f, f, c);
3929    tcg_gen_or_i32(dest, t, f);
3930}
3931
3932static inline void gen_neon_narrow(int size, TCGv dest, TCGv_i64 src)
3933{
3934    switch (size) {
3935    case 0: gen_helper_neon_narrow_u8(dest, src); break;
3936    case 1: gen_helper_neon_narrow_u16(dest, src); break;
3937    case 2: tcg_gen_trunc_i64_i32(dest, src); break;
3938    default: abort();
3939    }
3940}
3941
3942static inline void gen_neon_narrow_sats(int size, TCGv dest, TCGv_i64 src)
3943{
3944    switch (size) {
3945    case 0: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3946    case 1: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3947    case 2: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3948    default: abort();
3949    }
3950}
3951
3952static inline void gen_neon_narrow_satu(int size, TCGv dest, TCGv_i64 src)
3953{
3954    switch (size) {
3955    case 0: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3956    case 1: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3957    case 2: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3958    default: abort();
3959    }
3960}
3961
3962static inline void gen_neon_shift_narrow(int size, TCGv var, TCGv shift,
3963                                         int q, int u)
3964{
3965    if (q) {
3966        if (u) {
3967            switch (size) {
3968            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3969            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3970            default: abort();
3971            }
3972        } else {
3973            switch (size) {
3974            case 1: gen_helper_neon_rshl_s16(var, var, shift); break;
3975            case 2: gen_helper_neon_rshl_s32(var, var, shift); break;
3976            default: abort();
3977            }
3978        }
3979    } else {
3980        if (u) {
3981            switch (size) {
3982            case 1: gen_helper_neon_rshl_u16(var, var, shift); break;
3983            case 2: gen_helper_neon_rshl_u32(var, var, shift); break;
3984            default: abort();
3985            }
3986        } else {
3987            switch (size) {
3988            case 1: gen_helper_neon_shl_s16(var, var, shift); break;
3989            case 2: gen_helper_neon_shl_s32(var, var, shift); break;
3990            default: abort();
3991            }
3992        }
3993    }
3994}
3995
3996static inline void gen_neon_widen(TCGv_i64 dest, TCGv src, int size, int u)
3997{
3998    if (u) {
3999        switch (size) {
4000        case 0: gen_helper_neon_widen_u8(dest, src); break;
4001        case 1: gen_helper_neon_widen_u16(dest, src); break;
4002        case 2: tcg_gen_extu_i32_i64(dest, src); break;
4003        default: abort();
4004        }
4005    } else {
4006        switch (size) {
4007        case 0: gen_helper_neon_widen_s8(dest, src); break;
4008        case 1: gen_helper_neon_widen_s16(dest, src); break;
4009        case 2: tcg_gen_ext_i32_i64(dest, src); break;
4010        default: abort();
4011        }
4012    }
4013    dead_tmp(src);
4014}
4015
4016static inline void gen_neon_addl(int size)
4017{
4018    switch (size) {
4019    case 0: gen_helper_neon_addl_u16(CPU_V001); break;
4020    case 1: gen_helper_neon_addl_u32(CPU_V001); break;
4021    case 2: tcg_gen_add_i64(CPU_V001); break;
4022    default: abort();
4023    }
4024}
4025
4026static inline void gen_neon_subl(int size)
4027{
4028    switch (size) {
4029    case 0: gen_helper_neon_subl_u16(CPU_V001); break;
4030    case 1: gen_helper_neon_subl_u32(CPU_V001); break;
4031    case 2: tcg_gen_sub_i64(CPU_V001); break;
4032    default: abort();
4033    }
4034}
4035
4036static inline void gen_neon_negl(TCGv_i64 var, int size)
4037{
4038    switch (size) {
4039    case 0: gen_helper_neon_negl_u16(var, var); break;
4040    case 1: gen_helper_neon_negl_u32(var, var); break;
4041    case 2: gen_helper_neon_negl_u64(var, var); break;
4042    default: abort();
4043    }
4044}
4045
4046static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
4047{
4048    switch (size) {
4049    case 1: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
4050    case 2: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
4051    default: abort();
4052    }
4053}
4054
4055static inline void gen_neon_mull(TCGv_i64 dest, TCGv a, TCGv b, int size, int u)
4056{
4057    TCGv_i64 tmp;
4058
4059    switch ((size << 1) | u) {
4060    case 0: gen_helper_neon_mull_s8(dest, a, b); break;
4061    case 1: gen_helper_neon_mull_u8(dest, a, b); break;
4062    case 2: gen_helper_neon_mull_s16(dest, a, b); break;
4063    case 3: gen_helper_neon_mull_u16(dest, a, b); break;
4064    case 4:
4065        tmp = gen_muls_i64_i32(a, b);
4066        tcg_gen_mov_i64(dest, tmp);
4067        break;
4068    case 5:
4069        tmp = gen_mulu_i64_i32(a, b);
4070        tcg_gen_mov_i64(dest, tmp);
4071        break;
4072    default: abort();
4073    }
4074    if (size < 2) {
4075        dead_tmp(b);
4076        dead_tmp(a);
4077    }
4078}
4079
4080/* Translate a NEON data processing instruction.  Return nonzero if the
4081   instruction is invalid.
4082   We process data in a mixture of 32-bit and 64-bit chunks.
4083   Mostly we use 32-bit chunks so we can use normal scalar instructions.  */
4084
4085static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn)
4086{
4087    int op;
4088    int q;
4089    int rd, rn, rm;
4090    int size;
4091    int shift;
4092    int pass;
4093    int count;
4094    int pairwise;
4095    int u;
4096    int n;
4097    uint32_t imm;
4098    TCGv tmp;
4099    TCGv tmp2;
4100    TCGv tmp3;
4101    TCGv_i64 tmp64;
4102
4103    if (!vfp_enabled(env))
4104      return 1;
4105    q = (insn & (1 << 6)) != 0;
4106    u = (insn >> 24) & 1;
4107    VFP_DREG_D(rd, insn);
4108    VFP_DREG_N(rn, insn);
4109    VFP_DREG_M(rm, insn);
4110    size = (insn >> 20) & 3;
4111    if ((insn & (1 << 23)) == 0) {
4112        /* Three register same length.  */
4113        op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
4114        if (size == 3 && (op == 1 || op == 5 || op == 8 || op == 9
4115                          || op == 10 || op  == 11 || op == 16)) {
4116            /* 64-bit element instructions.  */
4117            for (pass = 0; pass < (q ? 2 : 1); pass++) {
4118                neon_load_reg64(cpu_V0, rn + pass);
4119                neon_load_reg64(cpu_V1, rm + pass);
4120                switch (op) {
4121                case 1: /* VQADD */
4122                    if (u) {
4123                        gen_helper_neon_add_saturate_u64(CPU_V001);
4124                    } else {
4125                        gen_helper_neon_add_saturate_s64(CPU_V001);
4126                    }
4127                    break;
4128                case 5: /* VQSUB */
4129                    if (u) {
4130                        gen_helper_neon_sub_saturate_u64(CPU_V001);
4131                    } else {
4132                        gen_helper_neon_sub_saturate_s64(CPU_V001);
4133                    }
4134                    break;
4135                case 8: /* VSHL */
4136                    if (u) {
4137                        gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4138                    } else {
4139                        gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4140                    }
4141                    break;
4142                case 9: /* VQSHL */
4143                    if (u) {
4144                        gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4145                                                 cpu_V0, cpu_V0);
4146                    } else {
4147                        gen_helper_neon_qshl_s64(cpu_V1, cpu_env,
4148                                                 cpu_V1, cpu_V0);
4149                    }
4150                    break;
4151                case 10: /* VRSHL */
4152                    if (u) {
4153                        gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4154                    } else {
4155                        gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4156                    }
4157                    break;
4158                case 11: /* VQRSHL */
4159                    if (u) {
4160                        gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4161                                                  cpu_V1, cpu_V0);
4162                    } else {
4163                        gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4164                                                  cpu_V1, cpu_V0);
4165                    }
4166                    break;
4167                case 16:
4168                    if (u) {
4169                        tcg_gen_sub_i64(CPU_V001);
4170                    } else {
4171                        tcg_gen_add_i64(CPU_V001);
4172                    }
4173                    break;
4174                default:
4175                    abort();
4176                }
4177                neon_store_reg64(cpu_V0, rd + pass);
4178            }
4179            return 0;
4180        }
4181        switch (op) {
4182        case 8: /* VSHL */
4183        case 9: /* VQSHL */
4184        case 10: /* VRSHL */
4185        case 11: /* VQRSHL */
4186            {
4187                int rtmp;
4188                /* Shift instruction operands are reversed.  */
4189                rtmp = rn;
4190                rn = rm;
4191                rm = rtmp;
4192                pairwise = 0;
4193            }
4194            break;
4195        case 20: /* VPMAX */
4196        case 21: /* VPMIN */
4197        case 23: /* VPADD */
4198            pairwise = 1;
4199            break;
4200        case 26: /* VPADD (float) */
4201            pairwise = (u && size < 2);
4202            break;
4203        case 30: /* VPMIN/VPMAX (float) */
4204            pairwise = u;
4205            break;
4206        default:
4207            pairwise = 0;
4208            break;
4209        }
4210        for (pass = 0; pass < (q ? 4 : 2); pass++) {
4211
4212        if (pairwise) {
4213            /* Pairwise.  */
4214            if (q)
4215                n = (pass & 1) * 2;
4216            else
4217                n = 0;
4218            if (pass < q + 1) {
4219                NEON_GET_REG(T0, rn, n);
4220                NEON_GET_REG(T1, rn, n + 1);
4221            } else {
4222                NEON_GET_REG(T0, rm, n);
4223                NEON_GET_REG(T1, rm, n + 1);
4224            }
4225        } else {
4226            /* Elementwise.  */
4227            NEON_GET_REG(T0, rn, pass);
4228            NEON_GET_REG(T1, rm, pass);
4229        }
4230        switch (op) {
4231        case 0: /* VHADD */
4232            GEN_NEON_INTEGER_OP(hadd);
4233            break;
4234        case 1: /* VQADD */
4235            GEN_NEON_INTEGER_OP_ENV(qadd);
4236            break;
4237        case 2: /* VRHADD */
4238            GEN_NEON_INTEGER_OP(rhadd);
4239            break;
4240        case 3: /* Logic ops.  */
4241            switch ((u << 2) | size) {
4242            case 0: /* VAND */
4243                gen_op_andl_T0_T1();
4244                break;
4245            case 1: /* BIC */
4246                gen_op_bicl_T0_T1();
4247                break;
4248            case 2: /* VORR */
4249                gen_op_orl_T0_T1();
4250                break;
4251            case 3: /* VORN */
4252                gen_op_notl_T1();
4253                gen_op_orl_T0_T1();
4254                break;
4255            case 4: /* VEOR */
4256                gen_op_xorl_T0_T1();
4257                break;
4258            case 5: /* VBSL */
4259                tmp = neon_load_reg(rd, pass);
4260                gen_neon_bsl(cpu_T[0], cpu_T[0], cpu_T[1], tmp);
4261                dead_tmp(tmp);
4262                break;
4263            case 6: /* VBIT */
4264                tmp = neon_load_reg(rd, pass);
4265                gen_neon_bsl(cpu_T[0], cpu_T[0], tmp, cpu_T[1]);
4266                dead_tmp(tmp);
4267                break;
4268            case 7: /* VBIF */
4269                tmp = neon_load_reg(rd, pass);
4270                gen_neon_bsl(cpu_T[0], tmp, cpu_T[0], cpu_T[1]);
4271                dead_tmp(tmp);
4272                break;
4273            }
4274            break;
4275        case 4: /* VHSUB */
4276            GEN_NEON_INTEGER_OP(hsub);
4277            break;
4278        case 5: /* VQSUB */
4279            GEN_NEON_INTEGER_OP_ENV(qsub);
4280            break;
4281        case 6: /* VCGT */
4282            GEN_NEON_INTEGER_OP(cgt);
4283            break;
4284        case 7: /* VCGE */
4285            GEN_NEON_INTEGER_OP(cge);
4286            break;
4287        case 8: /* VSHL */
4288            GEN_NEON_INTEGER_OP(shl);
4289            break;
4290        case 9: /* VQSHL */
4291            GEN_NEON_INTEGER_OP_ENV(qshl);
4292            break;
4293        case 10: /* VRSHL */
4294            GEN_NEON_INTEGER_OP(rshl);
4295            break;
4296        case 11: /* VQRSHL */
4297            GEN_NEON_INTEGER_OP_ENV(qrshl);
4298            break;
4299        case 12: /* VMAX */
4300            GEN_NEON_INTEGER_OP(max);
4301            break;
4302        case 13: /* VMIN */
4303            GEN_NEON_INTEGER_OP(min);
4304            break;
4305        case 14: /* VABD */
4306            GEN_NEON_INTEGER_OP(abd);
4307            break;
4308        case 15: /* VABA */
4309            GEN_NEON_INTEGER_OP(abd);
4310            NEON_GET_REG(T1, rd, pass);
4311            gen_neon_add(size);
4312            break;
4313        case 16:
4314            if (!u) { /* VADD */
4315                if (gen_neon_add(size))
4316                    return 1;
4317            } else { /* VSUB */
4318                switch (size) {
4319                case 0: gen_helper_neon_sub_u8(CPU_T001); break;
4320                case 1: gen_helper_neon_sub_u16(CPU_T001); break;
4321                case 2: gen_op_subl_T0_T1(); break;
4322                default: return 1;
4323                }
4324            }
4325            break;
4326        case 17:
4327            if (!u) { /* VTST */
4328                switch (size) {
4329                case 0: gen_helper_neon_tst_u8(CPU_T001); break;
4330                case 1: gen_helper_neon_tst_u16(CPU_T001); break;
4331                case 2: gen_helper_neon_tst_u32(CPU_T001); break;
4332                default: return 1;
4333                }
4334            } else { /* VCEQ */
4335                switch (size) {
4336                case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
4337                case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
4338                case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
4339                default: return 1;
4340                }
4341            }
4342            break;
4343        case 18: /* Multiply.  */
4344            switch (size) {
4345            case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4346            case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4347            case 2: gen_op_mul_T0_T1(); break;
4348            default: return 1;
4349            }
4350            NEON_GET_REG(T1, rd, pass);
4351            if (u) { /* VMLS */
4352                gen_neon_rsb(size);
4353            } else { /* VMLA */
4354                gen_neon_add(size);
4355            }
4356            break;
4357        case 19: /* VMUL */
4358            if (u) { /* polynomial */
4359                gen_helper_neon_mul_p8(CPU_T001);
4360            } else { /* Integer */
4361                switch (size) {
4362                case 0: gen_helper_neon_mul_u8(CPU_T001); break;
4363                case 1: gen_helper_neon_mul_u16(CPU_T001); break;
4364                case 2: gen_op_mul_T0_T1(); break;
4365                default: return 1;
4366                }
4367            }
4368            break;
4369        case 20: /* VPMAX */
4370            GEN_NEON_INTEGER_OP(pmax);
4371            break;
4372        case 21: /* VPMIN */
4373            GEN_NEON_INTEGER_OP(pmin);
4374            break;
4375        case 22: /* Hultiply high.  */
4376            if (!u) { /* VQDMULH */
4377                switch (size) {
4378                case 1: gen_helper_neon_qdmulh_s16(CPU_T0E01); break;
4379                case 2: gen_helper_neon_qdmulh_s32(CPU_T0E01); break;
4380                default: return 1;
4381                }
4382            } else { /* VQRDHMUL */
4383                switch (size) {
4384                case 1: gen_helper_neon_qrdmulh_s16(CPU_T0E01); break;
4385                case 2: gen_helper_neon_qrdmulh_s32(CPU_T0E01); break;
4386                default: return 1;
4387                }
4388            }
4389            break;
4390        case 23: /* VPADD */
4391            if (u)
4392                return 1;
4393            switch (size) {
4394            case 0: gen_helper_neon_padd_u8(CPU_T001); break;
4395            case 1: gen_helper_neon_padd_u16(CPU_T001); break;
4396            case 2: gen_op_addl_T0_T1(); break;
4397            default: return 1;
4398            }
4399            break;
4400        case 26: /* Floating point arithnetic.  */
4401            switch ((u << 2) | size) {
4402            case 0: /* VADD */
4403                gen_helper_neon_add_f32(CPU_T001);
4404                break;
4405            case 2: /* VSUB */
4406                gen_helper_neon_sub_f32(CPU_T001);
4407                break;
4408            case 4: /* VPADD */
4409                gen_helper_neon_add_f32(CPU_T001);
4410                break;
4411            case 6: /* VABD */
4412                gen_helper_neon_abd_f32(CPU_T001);
4413                break;
4414            default:
4415                return 1;
4416            }
4417            break;
4418        case 27: /* Float multiply.  */
4419            gen_helper_neon_mul_f32(CPU_T001);
4420            if (!u) {
4421                NEON_GET_REG(T1, rd, pass);
4422                if (size == 0) {
4423                    gen_helper_neon_add_f32(CPU_T001);
4424                } else {
4425                    gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
4426                }
4427            }
4428            break;
4429        case 28: /* Float compare.  */
4430            if (!u) {
4431                gen_helper_neon_ceq_f32(CPU_T001);
4432            } else {
4433                if (size == 0)
4434                    gen_helper_neon_cge_f32(CPU_T001);
4435                else
4436                    gen_helper_neon_cgt_f32(CPU_T001);
4437            }
4438            break;
4439        case 29: /* Float compare absolute.  */
4440            if (!u)
4441                return 1;
4442            if (size == 0)
4443                gen_helper_neon_acge_f32(CPU_T001);
4444            else
4445                gen_helper_neon_acgt_f32(CPU_T001);
4446            break;
4447        case 30: /* Float min/max.  */
4448            if (size == 0)
4449                gen_helper_neon_max_f32(CPU_T001);
4450            else
4451                gen_helper_neon_min_f32(CPU_T001);
4452            break;
4453        case 31:
4454            if (size == 0)
4455                gen_helper_recps_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4456            else
4457                gen_helper_rsqrts_f32(cpu_T[0], cpu_T[0], cpu_T[1], cpu_env);
4458            break;
4459        default:
4460            abort();
4461        }
4462        /* Save the result.  For elementwise operations we can put it
4463           straight into the destination register.  For pairwise operations
4464           we have to be careful to avoid clobbering the source operands.  */
4465        if (pairwise && rd == rm) {
4466            gen_neon_movl_scratch_T0(pass);
4467        } else {
4468            NEON_SET_REG(T0, rd, pass);
4469        }
4470
4471        } /* for pass */
4472        if (pairwise && rd == rm) {
4473            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4474                gen_neon_movl_T0_scratch(pass);
4475                NEON_SET_REG(T0, rd, pass);
4476            }
4477        }
4478        /* End of 3 register same size operations.  */
4479    } else if (insn & (1 << 4)) {
4480        if ((insn & 0x00380080) != 0) {
4481            /* Two registers and shift.  */
4482            op = (insn >> 8) & 0xf;
4483            if (insn & (1 << 7)) {
4484                /* 64-bit shift.   */
4485                size = 3;
4486            } else {
4487                size = 2;
4488                while ((insn & (1 << (size + 19))) == 0)
4489                    size--;
4490            }
4491            shift = (insn >> 16) & ((1 << (3 + size)) - 1);
4492            /* To avoid excessive dumplication of ops we implement shift
4493               by immediate using the variable shift operations.  */
4494            if (op < 8) {
4495                /* Shift by immediate:
4496                   VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU.  */
4497                /* Right shifts are encoded as N - shift, where N is the
4498                   element size in bits.  */
4499                if (op <= 4)
4500                    shift = shift - (1 << (size + 3));
4501                if (size == 3) {
4502                    count = q + 1;
4503                } else {
4504                    count = q ? 4: 2;
4505                }
4506                switch (size) {
4507                case 0:
4508                    imm = (uint8_t) shift;
4509                    imm |= imm << 8;
4510                    imm |= imm << 16;
4511                    break;
4512                case 1:
4513                    imm = (uint16_t) shift;
4514                    imm |= imm << 16;
4515                    break;
4516                case 2:
4517                case 3:
4518                    imm = shift;
4519                    break;
4520                default:
4521                    abort();
4522                }
4523
4524                for (pass = 0; pass < count; pass++) {
4525                    if (size == 3) {
4526                        neon_load_reg64(cpu_V0, rm + pass);
4527                        tcg_gen_movi_i64(cpu_V1, imm);
4528                        switch (op) {
4529                        case 0:  /* VSHR */
4530                        case 1:  /* VSRA */
4531                            if (u)
4532                                gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4533                            else
4534                                gen_helper_neon_shl_s64(cpu_V0, cpu_V0, cpu_V1);
4535                            break;
4536                        case 2: /* VRSHR */
4537                        case 3: /* VRSRA */
4538                            if (u)
4539                                gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
4540                            else
4541                                gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
4542                            break;
4543                        case 4: /* VSRI */
4544                            if (!u)
4545                                return 1;
4546                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4547                            break;
4548                        case 5: /* VSHL, VSLI */
4549                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, cpu_V1);
4550                            break;
4551                        case 6: /* VQSHL */
4552                            if (u)
4553                                gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4554                            else
4555                                gen_helper_neon_qshl_s64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4556                            break;
4557                        case 7: /* VQSHLU */
4558                            gen_helper_neon_qshl_u64(cpu_V0, cpu_env, cpu_V0, cpu_V1);
4559                            break;
4560                        }
4561                        if (op == 1 || op == 3) {
4562                            /* Accumulate.  */
4563                            neon_load_reg64(cpu_V0, rd + pass);
4564                            tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
4565                        } else if (op == 4 || (op == 5 && u)) {
4566                            /* Insert */
4567                            cpu_abort(env, "VS[LR]I.64 not implemented");
4568                        }
4569                        neon_store_reg64(cpu_V0, rd + pass);
4570                    } else { /* size < 3 */
4571                        /* Operands in T0 and T1.  */
4572                        gen_op_movl_T1_im(imm);
4573                        NEON_GET_REG(T0, rm, pass);
4574                        switch (op) {
4575                        case 0:  /* VSHR */
4576                        case 1:  /* VSRA */
4577                            GEN_NEON_INTEGER_OP(shl);
4578                            break;
4579                        case 2: /* VRSHR */
4580                        case 3: /* VRSRA */
4581                            GEN_NEON_INTEGER_OP(rshl);
4582                            break;
4583                        case 4: /* VSRI */
4584                            if (!u)
4585                                return 1;
4586                            GEN_NEON_INTEGER_OP(shl);
4587                            break;
4588                        case 5: /* VSHL, VSLI */
4589                            switch (size) {
4590                            case 0: gen_helper_neon_shl_u8(CPU_T001); break;
4591                            case 1: gen_helper_neon_shl_u16(CPU_T001); break;
4592                            case 2: gen_helper_neon_shl_u32(CPU_T001); break;
4593                            default: return 1;
4594                            }
4595                            break;
4596                        case 6: /* VQSHL */
4597                            GEN_NEON_INTEGER_OP_ENV(qshl);
4598                            break;
4599                        case 7: /* VQSHLU */
4600                            switch (size) {
4601                            case 0: gen_helper_neon_qshl_u8(CPU_T0E01); break;
4602                            case 1: gen_helper_neon_qshl_u16(CPU_T0E01); break;
4603                            case 2: gen_helper_neon_qshl_u32(CPU_T0E01); break;
4604                            default: return 1;
4605                            }
4606                            break;
4607                        }
4608
4609                        if (op == 1 || op == 3) {
4610                            /* Accumulate.  */
4611                            NEON_GET_REG(T1, rd, pass);
4612                            gen_neon_add(size);
4613                        } else if (op == 4 || (op == 5 && u)) {
4614                            /* Insert */
4615                            switch (size) {
4616                            case 0:
4617                                if (op == 4)
4618                                    imm = 0xff >> -shift;
4619                                else
4620                                    imm = (uint8_t)(0xff << shift);
4621                                imm |= imm << 8;
4622                                imm |= imm << 16;
4623                                break;
4624                            case 1:
4625                                if (op == 4)
4626                                    imm = 0xffff >> -shift;
4627                                else
4628                                    imm = (uint16_t)(0xffff << shift);
4629                                imm |= imm << 16;
4630                                break;
4631                            case 2:
4632                                if (op == 4)
4633                                    imm = 0xffffffffu >> -shift;
4634                                else
4635                                    imm = 0xffffffffu << shift;
4636                                break;
4637                            default:
4638                                abort();
4639                            }
4640                            tmp = neon_load_reg(rd, pass);
4641                            tcg_gen_andi_i32(cpu_T[0], cpu_T[0], imm);
4642                            tcg_gen_andi_i32(tmp, tmp, ~imm);
4643                            tcg_gen_or_i32(cpu_T[0], cpu_T[0], tmp);
4644                        }
4645                        NEON_SET_REG(T0, rd, pass);
4646                    }
4647                } /* for pass */
4648            } else if (op < 10) {
4649                /* Shift by immediate and narrow:
4650                   VSHRN, VRSHRN, VQSHRN, VQRSHRN.  */
4651                shift = shift - (1 << (size + 3));
4652                size++;
4653                switch (size) {
4654                case 1:
4655                    imm = (uint16_t)shift;
4656                    imm |= imm << 16;
4657                    tmp2 = tcg_const_i32(imm);
4658                    TCGV_UNUSED_I64(tmp64);
4659                    break;
4660                case 2:
4661                    imm = (uint32_t)shift;
4662                    tmp2 = tcg_const_i32(imm);
4663                    TCGV_UNUSED_I64(tmp64);
4664                    break;
4665                case 3:
4666                    tmp64 = tcg_const_i64(shift);
4667                    TCGV_UNUSED(tmp2);
4668                    break;
4669                default:
4670                    abort();
4671                }
4672
4673                for (pass = 0; pass < 2; pass++) {
4674                    if (size == 3) {
4675                        neon_load_reg64(cpu_V0, rm + pass);
4676                        if (q) {
4677                          if (u)
4678                            gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, tmp64);
4679                          else
4680                            gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, tmp64);
4681                        } else {
4682                          if (u)
4683                            gen_helper_neon_shl_u64(cpu_V0, cpu_V0, tmp64);
4684                          else
4685                            gen_helper_neon_shl_s64(cpu_V0, cpu_V0, tmp64);
4686                        }
4687                    } else {
4688                        tmp = neon_load_reg(rm + pass, 0);
4689                        gen_neon_shift_narrow(size, tmp, tmp2, q, u);
4690                        tmp3 = neon_load_reg(rm + pass, 1);
4691                        gen_neon_shift_narrow(size, tmp3, tmp2, q, u);
4692                        tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
4693                        dead_tmp(tmp);
4694                        dead_tmp(tmp3);
4695                    }
4696                    tmp = new_tmp();
4697                    if (op == 8 && !u) {
4698                        gen_neon_narrow(size - 1, tmp, cpu_V0);
4699                    } else {
4700                        if (op == 8)
4701                            gen_neon_narrow_sats(size - 1, tmp, cpu_V0);
4702                        else
4703                            gen_neon_narrow_satu(size - 1, tmp, cpu_V0);
4704                    }
4705                    if (pass == 0) {
4706                        tmp2 = tmp;
4707                    } else {
4708                        neon_store_reg(rd, 0, tmp2);
4709                        neon_store_reg(rd, 1, tmp);
4710                    }
4711                } /* for pass */
4712            } else if (op == 10) {
4713                /* VSHLL */
4714                if (q || size == 3)
4715                    return 1;
4716                tmp = neon_load_reg(rm, 0);
4717                tmp2 = neon_load_reg(rm, 1);
4718                for (pass = 0; pass < 2; pass++) {
4719                    if (pass == 1)
4720                        tmp = tmp2;
4721
4722                    gen_neon_widen(cpu_V0, tmp, size, u);
4723
4724                    if (shift != 0) {
4725                        /* The shift is less than the width of the source
4726                           type, so we can just shift the whole register.  */
4727                        tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
4728                        if (size < 2 || !u) {
4729                            uint64_t imm64;
4730                            if (size == 0) {
4731                                imm = (0xffu >> (8 - shift));
4732                                imm |= imm << 16;
4733                            } else {
4734                                imm = 0xffff >> (16 - shift);
4735                            }
4736                            imm64 = imm | (((uint64_t)imm) << 32);
4737                            tcg_gen_andi_i64(cpu_V0, cpu_V0, imm64);
4738                        }
4739                    }
4740                    neon_store_reg64(cpu_V0, rd + pass);
4741                }
4742            } else if (op == 15 || op == 16) {
4743                /* VCVT fixed-point.  */
4744                for (pass = 0; pass < (q ? 4 : 2); pass++) {
4745                    tcg_gen_ld_f32(cpu_F0s, cpu_env, neon_reg_offset(rm, pass));
4746                    if (op & 1) {
4747                        if (u)
4748                            gen_vfp_ulto(0, shift);
4749                        else
4750                            gen_vfp_slto(0, shift);
4751                    } else {
4752                        if (u)
4753                            gen_vfp_toul(0, shift);
4754                        else
4755                            gen_vfp_tosl(0, shift);
4756                    }
4757                    tcg_gen_st_f32(cpu_F0s, cpu_env, neon_reg_offset(rd, pass));
4758                }
4759            } else {
4760                return 1;
4761            }
4762        } else { /* (insn & 0x00380080) == 0 */
4763            int invert;
4764
4765            op = (insn >> 8) & 0xf;
4766            /* One register and immediate.  */
4767            imm = (u << 7) | ((insn >> 12) & 0x70) | (insn & 0xf);
4768            invert = (insn & (1 << 5)) != 0;
4769            switch (op) {
4770            case 0: case 1:
4771                /* no-op */
4772                break;
4773            case 2: case 3:
4774                imm <<= 8;
4775                break;
4776            case 4: case 5:
4777                imm <<= 16;
4778                break;
4779            case 6: case 7:
4780                imm <<= 24;
4781                break;
4782            case 8: case 9:
4783                imm |= imm << 16;
4784                break;
4785            case 10: case 11:
4786                imm = (imm << 8) | (imm << 24);
4787                break;
4788            case 12:
4789                imm = (imm < 8) | 0xff;
4790                break;
4791            case 13:
4792                imm = (imm << 16) | 0xffff;
4793                break;
4794            case 14:
4795                imm |= (imm << 8) | (imm << 16) | (imm << 24);
4796                if (invert)
4797                    imm = ~imm;
4798                break;
4799            case 15:
4800                imm = ((imm & 0x80) << 24) | ((imm & 0x3f) << 19)
4801                      | ((imm & 0x40) ? (0x1f << 25) : (1 << 30));
4802                break;
4803            }
4804            if (invert)
4805                imm = ~imm;
4806
4807            if (op != 14 || !invert)
4808                gen_op_movl_T1_im(imm);
4809
4810            for (pass = 0; pass < (q ? 4 : 2); pass++) {
4811                if (op & 1 && op < 12) {
4812                    tmp = neon_load_reg(rd, pass);
4813                    if (invert) {
4814                        /* The immediate value has already been inverted, so
4815                           BIC becomes AND.  */
4816                        tcg_gen_andi_i32(tmp, tmp, imm);
4817                    } else {
4818                        tcg_gen_ori_i32(tmp, tmp, imm);
4819                    }
4820                } else {
4821                    /* VMOV, VMVN.  */
4822                    tmp = new_tmp();
4823                    if (op == 14 && invert) {
4824                        uint32_t val;
4825                        val = 0;
4826                        for (n = 0; n < 4; n++) {
4827                            if (imm & (1 << (n + (pass & 1) * 4)))
4828                                val |= 0xff << (n * 8);
4829                        }
4830                        tcg_gen_movi_i32(tmp, val);
4831                    } else {
4832                        tcg_gen_movi_i32(tmp, imm);
4833                    }
4834                }
4835                neon_store_reg(rd, pass, tmp);
4836            }
4837        }
4838    } else { /* (insn & 0x00800010 == 0x00800000) */
4839        if (size != 3) {
4840            op = (insn >> 8) & 0xf;
4841            if ((insn & (1 << 6)) == 0) {
4842                /* Three registers of different lengths.  */
4843                int src1_wide;
4844                int src2_wide;
4845                int prewiden;
4846                /* prewiden, src1_wide, src2_wide */
4847                static const int neon_3reg_wide[16][3] = {
4848                    {1, 0, 0}, /* VADDL */
4849                    {1, 1, 0}, /* VADDW */
4850                    {1, 0, 0}, /* VSUBL */
4851                    {1, 1, 0}, /* VSUBW */
4852                    {0, 1, 1}, /* VADDHN */
4853                    {0, 0, 0}, /* VABAL */
4854                    {0, 1, 1}, /* VSUBHN */
4855                    {0, 0, 0}, /* VABDL */
4856                    {0, 0, 0}, /* VMLAL */
4857                    {0, 0, 0}, /* VQDMLAL */
4858                    {0, 0, 0}, /* VMLSL */
4859                    {0, 0, 0}, /* VQDMLSL */
4860                    {0, 0, 0}, /* Integer VMULL */
4861                    {0, 0, 0}, /* VQDMULL */
4862                    {0, 0, 0}  /* Polynomial VMULL */
4863                };
4864
4865                prewiden = neon_3reg_wide[op][0];
4866                src1_wide = neon_3reg_wide[op][1];
4867                src2_wide = neon_3reg_wide[op][2];
4868
4869                if (size == 0 && (op == 9 || op == 11 || op == 13))
4870                    return 1;
4871
4872                /* Avoid overlapping operands.  Wide source operands are
4873                   always aligned so will never overlap with wide
4874                   destinations in problematic ways.  */
4875                if (rd == rm && !src2_wide) {
4876                    NEON_GET_REG(T0, rm, 1);
4877                    gen_neon_movl_scratch_T0(2);
4878                } else if (rd == rn && !src1_wide) {
4879                    NEON_GET_REG(T0, rn, 1);
4880                    gen_neon_movl_scratch_T0(2);
4881                }
4882                TCGV_UNUSED(tmp3);
4883                for (pass = 0; pass < 2; pass++) {
4884                    if (src1_wide) {
4885                        neon_load_reg64(cpu_V0, rn + pass);
4886                        TCGV_UNUSED(tmp);
4887                    } else {
4888                        if (pass == 1 && rd == rn) {
4889                            gen_neon_movl_T0_scratch(2);
4890                            tmp = new_tmp();
4891                            tcg_gen_mov_i32(tmp, cpu_T[0]);
4892                        } else {
4893                            tmp = neon_load_reg(rn, pass);
4894                        }
4895                        if (prewiden) {
4896                            gen_neon_widen(cpu_V0, tmp, size, u);
4897                        }
4898                    }
4899                    if (src2_wide) {
4900                        neon_load_reg64(cpu_V1, rm + pass);
4901                        TCGV_UNUSED(tmp2);
4902                    } else {
4903                        if (pass == 1 && rd == rm) {
4904                            gen_neon_movl_T0_scratch(2);
4905                            tmp2 = new_tmp();
4906                            tcg_gen_mov_i32(tmp2, cpu_T[0]);
4907                        } else {
4908                            tmp2 = neon_load_reg(rm, pass);
4909                        }
4910                        if (prewiden) {
4911                            gen_neon_widen(cpu_V1, tmp2, size, u);
4912                        }
4913                    }
4914                    switch (op) {
4915                    case 0: case 1: case 4: /* VADDL, VADDW, VADDHN, VRADDHN */
4916                        gen_neon_addl(size);
4917                        break;
4918                    case 2: case 3: case 6: /* VSUBL, VSUBW, VSUBHL, VRSUBHL */
4919                        gen_neon_subl(size);
4920                        break;
4921                    case 5: case 7: /* VABAL, VABDL */
4922                        switch ((size << 1) | u) {
4923                        case 0:
4924                            gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
4925                            break;
4926                        case 1:
4927                            gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
4928                            break;
4929                        case 2:
4930                            gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
4931                            break;
4932                        case 3:
4933                            gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
4934                            break;
4935                        case 4:
4936                            gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
4937                            break;
4938                        case 5:
4939                            gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
4940                            break;
4941                        default: abort();
4942                        }
4943                        dead_tmp(tmp2);
4944                        dead_tmp(tmp);
4945                        break;
4946                    case 8: case 9: case 10: case 11: case 12: case 13:
4947                        /* VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL */
4948                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
4949                        break;
4950                    case 14: /* Polynomial VMULL */
4951                        cpu_abort(env, "Polynomial VMULL not implemented");
4952
4953                    default: /* 15 is RESERVED.  */
4954                        return 1;
4955                    }
4956                    if (op == 5 || op == 13 || (op >= 8 && op <= 11)) {
4957                        /* Accumulate.  */
4958                        if (op == 10 || op == 11) {
4959                            gen_neon_negl(cpu_V0, size);
4960                        }
4961
4962                        if (op != 13) {
4963                            neon_load_reg64(cpu_V1, rd + pass);
4964                        }
4965
4966                        switch (op) {
4967                        case 5: case 8: case 10: /* VABAL, VMLAL, VMLSL */
4968                            gen_neon_addl(size);
4969                            break;
4970                        case 9: case 11: /* VQDMLAL, VQDMLSL */
4971                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
4972                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
4973                            break;
4974                            /* Fall through.  */
4975                        case 13: /* VQDMULL */
4976                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
4977                            break;
4978                        default:
4979                            abort();
4980                        }
4981                        neon_store_reg64(cpu_V0, rd + pass);
4982                    } else if (op == 4 || op == 6) {
4983                        /* Narrowing operation.  */
4984                        tmp = new_tmp();
4985                        if (u) {
4986                            switch (size) {
4987                            case 0:
4988                                gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
4989                                break;
4990                            case 1:
4991                                gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
4992                                break;
4993                            case 2:
4994                                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
4995                                tcg_gen_trunc_i64_i32(tmp, cpu_V0);
4996                                break;
4997                            default: abort();
4998                            }
4999                        } else {
5000                            switch (size) {
5001                            case 0:
5002                                gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5003                                break;
5004                            case 1:
5005                                gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5006                                break;
5007                            case 2:
5008                                tcg_gen_addi_i64(cpu_V0, cpu_V0, 1u << 31);
5009                                tcg_gen_shri_i64(cpu_V0, cpu_V0, 32);
5010                                tcg_gen_trunc_i64_i32(tmp, cpu_V0);
5011                                break;
5012                            default: abort();
5013                            }
5014                        }
5015                        if (pass == 0) {
5016                            tmp3 = tmp;
5017                        } else {
5018                            neon_store_reg(rd, 0, tmp3);
5019                            neon_store_reg(rd, 1, tmp);
5020                        }
5021                    } else {
5022                        /* Write back the result.  */
5023                        neon_store_reg64(cpu_V0, rd + pass);
5024                    }
5025                }
5026            } else {
5027                /* Two registers and a scalar.  */
5028                switch (op) {
5029                case 0: /* Integer VMLA scalar */
5030                case 1: /* Float VMLA scalar */
5031                case 4: /* Integer VMLS scalar */
5032                case 5: /* Floating point VMLS scalar */
5033                case 8: /* Integer VMUL scalar */
5034                case 9: /* Floating point VMUL scalar */
5035                case 12: /* VQDMULH scalar */
5036                case 13: /* VQRDMULH scalar */
5037                    gen_neon_get_scalar(size, rm);
5038                    gen_neon_movl_scratch_T0(0);
5039                    for (pass = 0; pass < (u ? 4 : 2); pass++) {
5040                        if (pass != 0)
5041                            gen_neon_movl_T0_scratch(0);
5042                        NEON_GET_REG(T1, rn, pass);
5043                        if (op == 12) {
5044                            if (size == 1) {
5045                                gen_helper_neon_qdmulh_s16(CPU_T0E01);
5046                            } else {
5047                                gen_helper_neon_qdmulh_s32(CPU_T0E01);
5048                            }
5049                        } else if (op == 13) {
5050                            if (size == 1) {
5051                                gen_helper_neon_qrdmulh_s16(CPU_T0E01);
5052                            } else {
5053                                gen_helper_neon_qrdmulh_s32(CPU_T0E01);
5054                            }
5055                        } else if (op & 1) {
5056                            gen_helper_neon_mul_f32(CPU_T001);
5057                        } else {
5058                            switch (size) {
5059                            case 0: gen_helper_neon_mul_u8(CPU_T001); break;
5060                            case 1: gen_helper_neon_mul_u16(CPU_T001); break;
5061                            case 2: gen_op_mul_T0_T1(); break;
5062                            default: return 1;
5063                            }
5064                        }
5065                        if (op < 8) {
5066                            /* Accumulate.  */
5067                            NEON_GET_REG(T1, rd, pass);
5068                            switch (op) {
5069                            case 0:
5070                                gen_neon_add(size);
5071                                break;
5072                            case 1:
5073                                gen_helper_neon_add_f32(CPU_T001);
5074                                break;
5075                            case 4:
5076                                gen_neon_rsb(size);
5077                                break;
5078                            case 5:
5079                                gen_helper_neon_sub_f32(cpu_T[0], cpu_T[1], cpu_T[0]);
5080                                break;
5081                            default:
5082                                abort();
5083                            }
5084                        }
5085                        NEON_SET_REG(T0, rd, pass);
5086                    }
5087                    break;
5088                case 2: /* VMLAL sclar */
5089                case 3: /* VQDMLAL scalar */
5090                case 6: /* VMLSL scalar */
5091                case 7: /* VQDMLSL scalar */
5092                case 10: /* VMULL scalar */
5093                case 11: /* VQDMULL scalar */
5094                    if (size == 0 && (op == 3 || op == 7 || op == 11))
5095                        return 1;
5096
5097                    gen_neon_get_scalar(size, rm);
5098                    NEON_GET_REG(T1, rn, 1);
5099
5100                    for (pass = 0; pass < 2; pass++) {
5101                        if (pass == 0) {
5102                            tmp = neon_load_reg(rn, 0);
5103                        } else {
5104                            tmp = new_tmp();
5105                            tcg_gen_mov_i32(tmp, cpu_T[1]);
5106                        }
5107                        tmp2 = new_tmp();
5108                        tcg_gen_mov_i32(tmp2, cpu_T[0]);
5109                        gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5110                        if (op == 6 || op == 7) {
5111                            gen_neon_negl(cpu_V0, size);
5112                        }
5113                        if (op != 11) {
5114                            neon_load_reg64(cpu_V1, rd + pass);
5115                        }
5116                        switch (op) {
5117                        case 2: case 6:
5118                            gen_neon_addl(size);
5119                            break;
5120                        case 3: case 7:
5121                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5122                            gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5123                            break;
5124                        case 10:
5125                            /* no-op */
5126                            break;
5127                        case 11:
5128                            gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5129                            break;
5130                        default:
5131                            abort();
5132                        }
5133                        neon_store_reg64(cpu_V0, rd + pass);
5134                    }
5135                    break;
5136                default: /* 14 and 15 are RESERVED */
5137                    return 1;
5138                }
5139            }
5140        } else { /* size == 3 */
5141            if (!u) {
5142                /* Extract.  */
5143                imm = (insn >> 8) & 0xf;
5144                count = q + 1;
5145
5146                if (imm > 7 && !q)
5147                    return 1;
5148
5149                if (imm == 0) {
5150                    neon_load_reg64(cpu_V0, rn);
5151                    if (q) {
5152                        neon_load_reg64(cpu_V1, rn + 1);
5153                    }
5154                } else if (imm == 8) {
5155                    neon_load_reg64(cpu_V0, rn + 1);
5156                    if (q) {
5157                        neon_load_reg64(cpu_V1, rm);
5158                    }
5159                } else if (q) {
5160                    tmp64 = tcg_temp_new_i64();
5161                    if (imm < 8) {
5162                        neon_load_reg64(cpu_V0, rn);
5163                        neon_load_reg64(tmp64, rn + 1);
5164                    } else {
5165                        neon_load_reg64(cpu_V0, rn + 1);
5166                        neon_load_reg64(tmp64, rm);
5167                    }
5168                    tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & 7) * 8);
5169                    tcg_gen_shli_i64(cpu_V1, tmp64, 64 - ((imm & 7) * 8));
5170                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5171                    if (imm < 8) {
5172                        neon_load_reg64(cpu_V1, rm);
5173                    } else {
5174                        neon_load_reg64(cpu_V1, rm + 1);
5175                        imm -= 8;
5176                    }
5177                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5178                    tcg_gen_shri_i64(tmp64, tmp64, imm * 8);
5179                    tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
5180                } else {
5181                    /* BUGFIX */
5182                    neon_load_reg64(cpu_V0, rn);
5183                    tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * 8);
5184                    neon_load_reg64(cpu_V1, rm);
5185                    tcg_gen_shli_i64(cpu_V1, cpu_V1, 64 - (imm * 8));
5186                    tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5187                }
5188                neon_store_reg64(cpu_V0, rd);
5189                if (q) {
5190                    neon_store_reg64(cpu_V1, rd + 1);
5191                }
5192            } else if ((insn & (1 << 11)) == 0) {
5193                /* Two register misc.  */
5194                op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
5195                size = (insn >> 18) & 3;
5196                switch (op) {
5197                case 0: /* VREV64 */
5198                    if (size == 3)
5199                        return 1;
5200                    for (pass = 0; pass < (q ? 2 : 1); pass++) {
5201                        NEON_GET_REG(T0, rm, pass * 2);
5202                        NEON_GET_REG(T1, rm, pass * 2 + 1);
5203                        switch (size) {
5204                        case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5205                        case 1: gen_swap_half(cpu_T[0]); break;
5206                        case 2: /* no-op */ break;
5207                        default: abort();
5208                        }
5209                        NEON_SET_REG(T0, rd, pass * 2 + 1);
5210                        if (size == 2) {
5211                            NEON_SET_REG(T1, rd, pass * 2);
5212                        } else {
5213                            gen_op_movl_T0_T1();
5214                            switch (size) {
5215                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5216                            case 1: gen_swap_half(cpu_T[0]); break;
5217                            default: abort();
5218                            }
5219                            NEON_SET_REG(T0, rd, pass * 2);
5220                        }
5221                    }
5222                    break;
5223                case 4: case 5: /* VPADDL */
5224                case 12: case 13: /* VPADAL */
5225                    if (size == 3)
5226                        return 1;
5227                    for (pass = 0; pass < q + 1; pass++) {
5228                        tmp = neon_load_reg(rm, pass * 2);
5229                        gen_neon_widen(cpu_V0, tmp, size, op & 1);
5230                        tmp = neon_load_reg(rm, pass * 2 + 1);
5231                        gen_neon_widen(cpu_V1, tmp, size, op & 1);
5232                        switch (size) {
5233                        case 0: gen_helper_neon_paddl_u16(CPU_V001); break;
5234                        case 1: gen_helper_neon_paddl_u32(CPU_V001); break;
5235                        case 2: tcg_gen_add_i64(CPU_V001); break;
5236                        default: abort();
5237                        }
5238                        if (op >= 12) {
5239                            /* Accumulate.  */
5240                            neon_load_reg64(cpu_V1, rd + pass);
5241                            gen_neon_addl(size);
5242                        }
5243                        neon_store_reg64(cpu_V0, rd + pass);
5244                    }
5245                    break;
5246                case 33: /* VTRN */
5247                    if (size == 2) {
5248                        for (n = 0; n < (q ? 4 : 2); n += 2) {
5249                            NEON_GET_REG(T0, rm, n);
5250                            NEON_GET_REG(T1, rd, n + 1);
5251                            NEON_SET_REG(T1, rm, n);
5252                            NEON_SET_REG(T0, rd, n + 1);
5253                        }
5254                    } else {
5255                        goto elementwise;
5256                    }
5257                    break;
5258                case 34: /* VUZP */
5259                    /* Reg  Before       After
5260                       Rd   A3 A2 A1 A0  B2 B0 A2 A0
5261                       Rm   B3 B2 B1 B0  B3 B1 A3 A1
5262                     */
5263                    if (size == 3)
5264                        return 1;
5265                    gen_neon_unzip(rd, q, 0, size);
5266                    gen_neon_unzip(rm, q, 4, size);
5267                    if (q) {
5268                        static int unzip_order_q[8] =
5269                            {0, 2, 4, 6, 1, 3, 5, 7};
5270                        for (n = 0; n < 8; n++) {
5271                            int reg = (n < 4) ? rd : rm;
5272                            gen_neon_movl_T0_scratch(unzip_order_q[n]);
5273                            NEON_SET_REG(T0, reg, n % 4);
5274                        }
5275                    } else {
5276                        static int unzip_order[4] =
5277                            {0, 4, 1, 5};
5278                        for (n = 0; n < 4; n++) {
5279                            int reg = (n < 2) ? rd : rm;
5280                            gen_neon_movl_T0_scratch(unzip_order[n]);
5281                            NEON_SET_REG(T0, reg, n % 2);
5282                        }
5283                    }
5284                    break;
5285                case 35: /* VZIP */
5286                    /* Reg  Before       After
5287                       Rd   A3 A2 A1 A0  B1 A1 B0 A0
5288                       Rm   B3 B2 B1 B0  B3 A3 B2 A2
5289                     */
5290                    if (size == 3)
5291                        return 1;
5292                    count = (q ? 4 : 2);
5293                    for (n = 0; n < count; n++) {
5294                        NEON_GET_REG(T0, rd, n);
5295                        NEON_GET_REG(T1, rd, n);
5296                        switch (size) {
5297                        case 0: gen_helper_neon_zip_u8(); break;
5298                        case 1: gen_helper_neon_zip_u16(); break;
5299                        case 2: /* no-op */; break;
5300                        default: abort();
5301                        }
5302                        gen_neon_movl_scratch_T0(n * 2);
5303                        gen_neon_movl_scratch_T1(n * 2 + 1);
5304                    }
5305                    for (n = 0; n < count * 2; n++) {
5306                        int reg = (n < count) ? rd : rm;
5307                        gen_neon_movl_T0_scratch(n);
5308                        NEON_SET_REG(T0, reg, n % count);
5309                    }
5310                    break;
5311                case 36: case 37: /* VMOVN, VQMOVUN, VQMOVN */
5312                    if (size == 3)
5313                        return 1;
5314                    TCGV_UNUSED(tmp2);
5315                    for (pass = 0; pass < 2; pass++) {
5316                        neon_load_reg64(cpu_V0, rm + pass);
5317                        tmp = new_tmp();
5318                        if (op == 36 && q == 0) {
5319                            gen_neon_narrow(size, tmp, cpu_V0);
5320                        } else if (q) {
5321                            gen_neon_narrow_satu(size, tmp, cpu_V0);
5322                        } else {
5323                            gen_neon_narrow_sats(size, tmp, cpu_V0);
5324                        }
5325                        if (pass == 0) {
5326                            tmp2 = tmp;
5327                        } else {
5328                            neon_store_reg(rd, 0, tmp2);
5329                            neon_store_reg(rd, 1, tmp);
5330                        }
5331                    }
5332                    break;
5333                case 38: /* VSHLL */
5334                    if (q || size == 3)
5335                        return 1;
5336                    tmp = neon_load_reg(rm, 0);
5337                    tmp2 = neon_load_reg(rm, 1);
5338                    for (pass = 0; pass < 2; pass++) {
5339                        if (pass == 1)
5340                            tmp = tmp2;
5341                        gen_neon_widen(cpu_V0, tmp, size, 1);
5342                        neon_store_reg64(cpu_V0, rd + pass);
5343                    }
5344                    break;
5345                default:
5346                elementwise:
5347                    for (pass = 0; pass < (q ? 4 : 2); pass++) {
5348                        if (op == 30 || op == 31 || op >= 58) {
5349                            tcg_gen_ld_f32(cpu_F0s, cpu_env,
5350                                           neon_reg_offset(rm, pass));
5351                        } else {
5352                            NEON_GET_REG(T0, rm, pass);
5353                        }
5354                        switch (op) {
5355                        case 1: /* VREV32 */
5356                            switch (size) {
5357                            case 0: tcg_gen_bswap32_i32(cpu_T[0], cpu_T[0]); break;
5358                            case 1: gen_swap_half(cpu_T[0]); break;
5359                            default: return 1;
5360                            }
5361                            break;
5362                        case 2: /* VREV16 */
5363                            if (size != 0)
5364                                return 1;
5365                            gen_rev16(cpu_T[0]);
5366                            break;
5367                        case 8: /* CLS */
5368                            switch (size) {
5369                            case 0: gen_helper_neon_cls_s8(cpu_T[0], cpu_T[0]); break;
5370                            case 1: gen_helper_neon_cls_s16(cpu_T[0], cpu_T[0]); break;
5371                            case 2: gen_helper_neon_cls_s32(cpu_T[0], cpu_T[0]); break;
5372                            default: return 1;
5373                            }
5374                            break;
5375                        case 9: /* CLZ */
5376                            switch (size) {
5377                            case 0: gen_helper_neon_clz_u8(cpu_T[0], cpu_T[0]); break;
5378                            case 1: gen_helper_neon_clz_u16(cpu_T[0], cpu_T[0]); break;
5379                            case 2: gen_helper_clz(cpu_T[0], cpu_T[0]); break;
5380                            default: return 1;
5381                            }
5382                            break;
5383                        case 10: /* CNT */
5384                            if (size != 0)
5385                                return 1;
5386                            gen_helper_neon_cnt_u8(cpu_T[0], cpu_T[0]);
5387                            break;
5388                        case 11: /* VNOT */
5389                            if (size != 0)
5390                                return 1;
5391                            gen_op_notl_T0();
5392                            break;
5393                        case 14: /* VQABS */
5394                            switch (size) {
5395                            case 0: gen_helper_neon_qabs_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5396                            case 1: gen_helper_neon_qabs_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5397                            case 2: gen_helper_neon_qabs_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
5398                            default: return 1;
5399                            }
5400                            break;
5401                        case 15: /* VQNEG */
5402                            switch (size) {
5403                            case 0: gen_helper_neon_qneg_s8(cpu_T[0], cpu_env, cpu_T[0]); break;
5404                            case 1: gen_helper_neon_qneg_s16(cpu_T[0], cpu_env, cpu_T[0]); break;
5405                            case 2: gen_helper_neon_qneg_s32(cpu_T[0], cpu_env, cpu_T[0]); break;
5406                            default: return 1;
5407                            }
5408                            break;
5409                        case 16: case 19: /* VCGT #0, VCLE #0 */
5410                            gen_op_movl_T1_im(0);
5411                            switch(size) {
5412                            case 0: gen_helper_neon_cgt_s8(CPU_T001); break;
5413                            case 1: gen_helper_neon_cgt_s16(CPU_T001); break;
5414                            case 2: gen_helper_neon_cgt_s32(CPU_T001); break;
5415                            default: return 1;
5416                            }
5417                            if (op == 19)
5418                                gen_op_notl_T0();
5419                            break;
5420                        case 17: case 20: /* VCGE #0, VCLT #0 */
5421                            gen_op_movl_T1_im(0);
5422                            switch(size) {
5423                            case 0: gen_helper_neon_cge_s8(CPU_T001); break;
5424                            case 1: gen_helper_neon_cge_s16(CPU_T001); break;
5425                            case 2: gen_helper_neon_cge_s32(CPU_T001); break;
5426                            default: return 1;
5427                            }
5428                            if (op == 20)
5429                                gen_op_notl_T0();
5430                            break;
5431                        case 18: /* VCEQ #0 */
5432                            gen_op_movl_T1_im(0);
5433                            switch(size) {
5434                            case 0: gen_helper_neon_ceq_u8(CPU_T001); break;
5435                            case 1: gen_helper_neon_ceq_u16(CPU_T001); break;
5436                            case 2: gen_helper_neon_ceq_u32(CPU_T001); break;
5437                            default: return 1;
5438                            }
5439                            break;
5440                        case 22: /* VABS */
5441                            switch(size) {
5442                            case 0: gen_helper_neon_abs_s8(cpu_T[0], cpu_T[0]); break;
5443                            case 1: gen_helper_neon_abs_s16(cpu_T[0], cpu_T[0]); break;
5444                            case 2: tcg_gen_abs_i32(cpu_T[0], cpu_T[0]); break;
5445                            default: return 1;
5446                            }
5447                            break;
5448                        case 23: /* VNEG */
5449                            gen_op_movl_T1_im(0);
5450                            if (size == 3)
5451                                return 1;
5452                            gen_neon_rsb(size);
5453                            break;
5454                        case 24: case 27: /* Float VCGT #0, Float VCLE #0 */
5455                            gen_op_movl_T1_im(0);
5456                            gen_helper_neon_cgt_f32(CPU_T001);
5457                            if (op == 27)
5458                                gen_op_notl_T0();
5459                            break;
5460                        case 25: case 28: /* Float VCGE #0, Float VCLT #0 */
5461                            gen_op_movl_T1_im(0);
5462                            gen_helper_neon_cge_f32(CPU_T001);
5463                            if (op == 28)
5464                                gen_op_notl_T0();
5465                            break;
5466                        case 26: /* Float VCEQ #0 */
5467                            gen_op_movl_T1_im(0);
5468                            gen_helper_neon_ceq_f32(CPU_T001);
5469                            break;
5470                        case 30: /* Float VABS */
5471                            gen_vfp_abs(0);
5472                            break;
5473                        case 31: /* Float VNEG */
5474                            gen_vfp_neg(0);
5475                            break;
5476                        case 32: /* VSWP */
5477                            NEON_GET_REG(T1, rd, pass);
5478                            NEON_SET_REG(T1, rm, pass);
5479                            break;
5480                        case 33: /* VTRN */
5481                            NEON_GET_REG(T1, rd, pass);
5482                            switch (size) {
5483                            case 0: gen_helper_neon_trn_u8(); break;
5484                            case 1: gen_helper_neon_trn_u16(); break;
5485                            case 2: abort();
5486                            default: return 1;
5487                            }
5488                            NEON_SET_REG(T1, rm, pass);
5489                            break;
5490                        case 56: /* Integer VRECPE */
5491                            gen_helper_recpe_u32(cpu_T[0], cpu_T[0], cpu_env);
5492                            break;
5493                        case 57: /* Integer VRSQRTE */
5494                            gen_helper_rsqrte_u32(cpu_T[0], cpu_T[0], cpu_env);
5495                            break;
5496                        case 58: /* Float VRECPE */
5497                            gen_helper_recpe_f32(cpu_F0s, cpu_F0s, cpu_env);
5498                            break;
5499                        case 59: /* Float VRSQRTE */
5500                            gen_helper_rsqrte_f32(cpu_F0s, cpu_F0s, cpu_env);
5501                            break;
5502                        case 60: /* VCVT.F32.S32 */
5503                            gen_vfp_tosiz(0);
5504                            break;
5505                        case 61: /* VCVT.F32.U32 */
5506                            gen_vfp_touiz(0);
5507                            break;
5508                        case 62: /* VCVT.S32.F32 */
5509                            gen_vfp_sito(0);
5510                            break;
5511                        case 63: /* VCVT.U32.F32 */
5512                            gen_vfp_uito(0);
5513                            break;
5514                        default:
5515                            /* Reserved: 21, 29, 39-56 */
5516                            return 1;
5517                        }
5518                        if (op == 30 || op == 31 || op >= 58) {
5519                            tcg_gen_st_f32(cpu_F0s, cpu_env,
5520                                           neon_reg_offset(rd, pass));
5521                        } else {
5522                            NEON_SET_REG(T0, rd, pass);
5523                        }
5524                    }
5525                    break;
5526                }
5527            } else if ((insn & (1 << 10)) == 0) {
5528                /* VTBL, VTBX.  */
5529                n = ((insn >> 5) & 0x18) + 8;
5530                if (insn & (1 << 6)) {
5531                    tmp = neon_load_reg(rd, 0);
5532                } else {
5533                    tmp = new_tmp();
5534                    tcg_gen_movi_i32(tmp, 0);
5535                }
5536                tmp2 = neon_load_reg(rm, 0);
5537                gen_helper_neon_tbl(tmp2, tmp2, tmp, tcg_const_i32(rn),
5538                                    tcg_const_i32(n));
5539                dead_tmp(tmp);
5540                if (insn & (1 << 6)) {
5541                    tmp = neon_load_reg(rd, 1);
5542                } else {
5543                    tmp = new_tmp();
5544                    tcg_gen_movi_i32(tmp, 0);
5545                }
5546                tmp3 = neon_load_reg(rm, 1);
5547                gen_helper_neon_tbl(tmp3, tmp3, tmp, tcg_const_i32(rn),
5548                                    tcg_const_i32(n));
5549                neon_store_reg(rd, 0, tmp2);
5550                neon_store_reg(rd, 1, tmp3);
5551                dead_tmp(tmp);
5552            } else if ((insn & 0x380) == 0) {
5553                /* VDUP */
5554                if (insn & (1 << 19)) {
5555                    NEON_SET_REG(T0, rm, 1);
5556                } else {
5557                    NEON_SET_REG(T0, rm, 0);
5558                }
5559                if (insn & (1 << 16)) {
5560                    gen_neon_dup_u8(cpu_T[0], ((insn >> 17) & 3) * 8);
5561                } else if (insn & (1 << 17)) {
5562                    if ((insn >> 18) & 1)
5563                        gen_neon_dup_high16(cpu_T[0]);
5564                    else
5565                        gen_neon_dup_low16(cpu_T[0]);
5566                }
5567                for (pass = 0; pass < (q ? 4 : 2); pass++) {
5568                    NEON_SET_REG(T0, rd, pass);
5569                }
5570            } else {
5571                return 1;
5572            }
5573        }
5574    }
5575    return 0;
5576}
5577
5578static int disas_cp14_read(CPUState * env, DisasContext *s, uint32_t insn)
5579{
5580    int crn = (insn >> 16) & 0xf;
5581    int crm = insn & 0xf;
5582    int op1 = (insn >> 21) & 7;
5583    int op2 = (insn >> 5) & 7;
5584    int rt = (insn >> 12) & 0xf;
5585    TCGv tmp;
5586
5587    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
5588        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
5589            /* TEECR */
5590            if (IS_USER(s))
5591                return 1;
5592            tmp = load_cpu_field(teecr);
5593            store_reg(s, rt, tmp);
5594            return 0;
5595        }
5596        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
5597            /* TEEHBR */
5598            if (IS_USER(s) && (env->teecr & 1))
5599                return 1;
5600            tmp = load_cpu_field(teehbr);
5601            store_reg(s, rt, tmp);
5602            return 0;
5603        }
5604    }
5605    fprintf(stderr, "Unknown cp14 read op1:%d crn:%d crm:%d op2:%d\n",
5606            op1, crn, crm, op2);
5607    return 1;
5608}
5609
5610static int disas_cp14_write(CPUState * env, DisasContext *s, uint32_t insn)
5611{
5612    int crn = (insn >> 16) & 0xf;
5613    int crm = insn & 0xf;
5614    int op1 = (insn >> 21) & 7;
5615    int op2 = (insn >> 5) & 7;
5616    int rt = (insn >> 12) & 0xf;
5617    TCGv tmp;
5618
5619    if (arm_feature(env, ARM_FEATURE_THUMB2EE)) {
5620        if (op1 == 6 && crn == 0 && crm == 0 && op2 == 0) {
5621            /* TEECR */
5622            if (IS_USER(s))
5623                return 1;
5624            tmp = load_reg(s, rt);
5625            gen_helper_set_teecr(cpu_env, tmp);
5626            dead_tmp(tmp);
5627            return 0;
5628        }
5629        if (op1 == 6 && crn == 1 && crm == 0 && op2 == 0) {
5630            /* TEEHBR */
5631            if (IS_USER(s) && (env->teecr & 1))
5632                return 1;
5633            tmp = load_reg(s, rt);
5634            store_cpu_field(tmp, teehbr);
5635            return 0;
5636        }
5637    }
5638    fprintf(stderr, "Unknown cp14 write op1:%d crn:%d crm:%d op2:%d\n",
5639            op1, crn, crm, op2);
5640    return 1;
5641}
5642
5643static int disas_coproc_insn(CPUState * env, DisasContext *s, uint32_t insn)
5644{
5645    int cpnum;
5646
5647    cpnum = (insn >> 8) & 0xf;
5648    if (arm_feature(env, ARM_FEATURE_XSCALE)
5649	    && ((env->cp15.c15_cpar ^ 0x3fff) & (1 << cpnum)))
5650	return 1;
5651
5652    switch (cpnum) {
5653      case 0:
5654      case 1:
5655	if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
5656	    return disas_iwmmxt_insn(env, s, insn);
5657	} else if (arm_feature(env, ARM_FEATURE_XSCALE)) {
5658	    return disas_dsp_insn(env, s, insn);
5659	}
5660	return 1;
5661    case 10:
5662    case 11:
5663	return disas_vfp_insn (env, s, insn);
5664    case 14:
5665        /* Coprocessors 7-15 are architecturally reserved by ARM.
5666           Unfortunately Intel decided to ignore this.  */
5667        if (arm_feature(env, ARM_FEATURE_XSCALE))
5668            goto board;
5669        if (insn & (1 << 20))
5670            return disas_cp14_read(env, s, insn);
5671        else
5672            return disas_cp14_write(env, s, insn);
5673    case 15:
5674	return disas_cp15_insn (env, s, insn);
5675    default:
5676    board:
5677	/* Unknown coprocessor.  See if the board has hooked it.  */
5678	return disas_cp_insn (env, s, insn);
5679    }
5680}
5681
5682
5683/* Store a 64-bit value to a register pair.  Clobbers val.  */
5684static void gen_storeq_reg(DisasContext *s, int rlow, int rhigh, TCGv_i64 val)
5685{
5686    TCGv tmp;
5687    tmp = new_tmp();
5688    tcg_gen_trunc_i64_i32(tmp, val);
5689    store_reg(s, rlow, tmp);
5690    tmp = new_tmp();
5691    tcg_gen_shri_i64(val, val, 32);
5692    tcg_gen_trunc_i64_i32(tmp, val);
5693    store_reg(s, rhigh, tmp);
5694}
5695
5696/* load a 32-bit value from a register and perform a 64-bit accumulate.  */
5697static void gen_addq_lo(DisasContext *s, TCGv_i64 val, int rlow)
5698{
5699    TCGv_i64 tmp;
5700    TCGv tmp2;
5701
5702    /* Load value and extend to 64 bits.  */
5703    tmp = tcg_temp_new_i64();
5704    tmp2 = load_reg(s, rlow);
5705    tcg_gen_extu_i32_i64(tmp, tmp2);
5706    dead_tmp(tmp2);
5707    tcg_gen_add_i64(val, val, tmp);
5708}
5709
5710/* load and add a 64-bit value from a register pair.  */
5711static void gen_addq(DisasContext *s, TCGv_i64 val, int rlow, int rhigh)
5712{
5713    TCGv_i64 tmp;
5714    TCGv tmpl;
5715    TCGv tmph;
5716
5717    /* Load 64-bit value rd:rn.  */
5718    tmpl = load_reg(s, rlow);
5719    tmph = load_reg(s, rhigh);
5720    tmp = tcg_temp_new_i64();
5721    tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
5722    dead_tmp(tmpl);
5723    dead_tmp(tmph);
5724    tcg_gen_add_i64(val, val, tmp);
5725}
5726
5727/* Set N and Z flags from a 64-bit value.  */
5728static void gen_logicq_cc(TCGv_i64 val)
5729{
5730    TCGv tmp = new_tmp();
5731    gen_helper_logicq_cc(tmp, val);
5732    gen_logic_CC(tmp);
5733    dead_tmp(tmp);
5734}
5735
5736
5737#ifdef CONFIG_TRACE
5738
5739#define  gen_traceInsn()   gen_helper_traceInsn()
5740
5741static void
5742gen_traceTicks( int  count )
5743{
5744    TCGv  tmp = tcg_temp_new_i32();
5745    tcg_gen_movi_i32(tmp, count);
5746    gen_helper_traceTicks(tmp);
5747    tcg_temp_free_i32(tmp);
5748}
5749
5750static void
5751gen_traceBB( uint64_t  bbNum, target_phys_addr_t  tb )
5752{
5753#if HOST_LONG_BITS == 32
5754    TCGv_i64  tmpNum = tcg_temp_new_i64();
5755    TCGv_i32  tmpTb  = tcg_temp_new_i32();
5756
5757    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
5758    tcg_gen_movi_i32(tmpTb,  (int32_t)tb);
5759    gen_helper_traceBB32(tmpNum, tmpTb);
5760    tcg_temp_free_i32(tmpTb);
5761    tcg_temp_free_i64(tmpNum);
5762#elif HOST_LONG_BITS == 64
5763    TCGv_i64  tmpNum = tcg_temp_new_i64();
5764    TCGv_i64  tmpTb  = tcg_temp_new_i32();
5765
5766    tcg_gen_movi_i64(tmpNum, (int64_t)bbNum);
5767    tcg_gen_movi_i64(tmpTb,  (int64_t)tb);
5768    gen_helper_traceBB32(tmpNum, tmpTb);
5769    tcg_temp_free_i64(tmpTb);
5770    tcg_temp_free_i64(tmpNum);
5771#endif
5772}
5773#endif /* CONFIG_TRACE */
5774
5775static void disas_arm_insn(CPUState * env, DisasContext *s)
5776{
5777    unsigned int cond, insn, val, op1, i, shift, rm, rs, rn, rd, sh;
5778#ifdef CONFIG_TRACE
5779    int  ticks;
5780#endif
5781    TCGv tmp;
5782    TCGv tmp2;
5783    TCGv tmp3;
5784    TCGv addr;
5785    TCGv_i64 tmp64;
5786
5787    insn = ldl_code(s->pc);
5788#ifdef CONFIG_TRACE
5789    if (tracing) {
5790        trace_add_insn(insn, 0);
5791        ticks = get_insn_ticks_arm(insn);
5792        gen_traceInsn();
5793    }
5794#endif
5795    s->pc += 4;
5796
5797    /* M variants do not implement ARM mode.  */
5798    if (IS_M(env))
5799        goto illegal_op;
5800    cond = insn >> 28;
5801    if (cond == 0xf){
5802#ifdef CONFIG_TRACE
5803        if (tracing) {
5804            gen_traceTicks(ticks);
5805        }
5806#endif
5807        /* Unconditional instructions.  */
5808        if (((insn >> 25) & 7) == 1) {
5809            /* NEON Data processing.  */
5810            if (!arm_feature(env, ARM_FEATURE_NEON))
5811                goto illegal_op;
5812
5813            if (disas_neon_data_insn(env, s, insn))
5814                goto illegal_op;
5815            return;
5816        }
5817        if ((insn & 0x0f100000) == 0x04000000) {
5818            /* NEON load/store.  */
5819            if (!arm_feature(env, ARM_FEATURE_NEON))
5820                goto illegal_op;
5821
5822            if (disas_neon_ls_insn(env, s, insn))
5823                goto illegal_op;
5824            return;
5825        }
5826        if ((insn & 0x0d70f000) == 0x0550f000)
5827            return; /* PLD */
5828        else if ((insn & 0x0ffffdff) == 0x01010000) {
5829            ARCH(6);
5830            /* setend */
5831            if (insn & (1 << 9)) {
5832                /* BE8 mode not implemented.  */
5833                goto illegal_op;
5834            }
5835            return;
5836        } else if ((insn & 0x0fffff00) == 0x057ff000) {
5837            switch ((insn >> 4) & 0xf) {
5838            case 1: /* clrex */
5839                ARCH(6K);
5840                gen_helper_clrex(cpu_env);
5841                return;
5842            case 4: /* dsb */
5843            case 5: /* dmb */
5844            case 6: /* isb */
5845                ARCH(7);
5846                /* We don't emulate caches so these are a no-op.  */
5847                return;
5848            default:
5849                goto illegal_op;
5850            }
5851        } else if ((insn & 0x0e5fffe0) == 0x084d0500) {
5852            /* srs */
5853            uint32_t offset;
5854            if (IS_USER(s))
5855                goto illegal_op;
5856            ARCH(6);
5857            op1 = (insn & 0x1f);
5858            if (op1 == (env->uncached_cpsr & CPSR_M)) {
5859                addr = load_reg(s, 13);
5860            } else {
5861                addr = new_tmp();
5862                gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op1));
5863            }
5864            i = (insn >> 23) & 3;
5865            switch (i) {
5866            case 0: offset = -4; break; /* DA */
5867            case 1: offset = -8; break; /* DB */
5868            case 2: offset = 0; break; /* IA */
5869            case 3: offset = 4; break; /* IB */
5870            default: abort();
5871            }
5872            if (offset)
5873                tcg_gen_addi_i32(addr, addr, offset);
5874            tmp = load_reg(s, 14);
5875            gen_st32(tmp, addr, 0);
5876            tmp = new_tmp();
5877            gen_helper_cpsr_read(tmp);
5878            tcg_gen_addi_i32(addr, addr, 4);
5879            gen_st32(tmp, addr, 0);
5880            if (insn & (1 << 21)) {
5881                /* Base writeback.  */
5882                switch (i) {
5883                case 0: offset = -8; break;
5884                case 1: offset = -4; break;
5885                case 2: offset = 4; break;
5886                case 3: offset = 0; break;
5887                default: abort();
5888                }
5889                if (offset)
5890                    tcg_gen_addi_i32(addr, tmp, offset);
5891                if (op1 == (env->uncached_cpsr & CPSR_M)) {
5892                    gen_movl_reg_T1(s, 13);
5893                } else {
5894                    gen_helper_set_r13_banked(cpu_env, tcg_const_i32(op1), cpu_T[1]);
5895                }
5896            } else {
5897                dead_tmp(addr);
5898            }
5899        } else if ((insn & 0x0e5fffe0) == 0x081d0a00) {
5900            /* rfe */
5901            uint32_t offset;
5902            if (IS_USER(s))
5903                goto illegal_op;
5904            ARCH(6);
5905            rn = (insn >> 16) & 0xf;
5906            addr = load_reg(s, rn);
5907            i = (insn >> 23) & 3;
5908            switch (i) {
5909            case 0: offset = -4; break; /* DA */
5910            case 1: offset = -8; break; /* DB */
5911            case 2: offset = 0; break; /* IA */
5912            case 3: offset = 4; break; /* IB */
5913            default: abort();
5914            }
5915            if (offset)
5916                tcg_gen_addi_i32(addr, addr, offset);
5917            /* Load PC into tmp and CPSR into tmp2.  */
5918            tmp = gen_ld32(addr, 0);
5919            tcg_gen_addi_i32(addr, addr, 4);
5920            tmp2 = gen_ld32(addr, 0);
5921            if (insn & (1 << 21)) {
5922                /* Base writeback.  */
5923                switch (i) {
5924                case 0: offset = -8; break;
5925                case 1: offset = -4; break;
5926                case 2: offset = 4; break;
5927                case 3: offset = 0; break;
5928                default: abort();
5929                }
5930                if (offset)
5931                    tcg_gen_addi_i32(addr, addr, offset);
5932                store_reg(s, rn, addr);
5933            } else {
5934                dead_tmp(addr);
5935            }
5936            gen_rfe(s, tmp, tmp2);
5937        } else if ((insn & 0x0e000000) == 0x0a000000) {
5938            /* branch link and change to thumb (blx <offset>) */
5939            int32_t offset;
5940
5941            val = (uint32_t)s->pc;
5942            tmp = new_tmp();
5943            tcg_gen_movi_i32(tmp, val);
5944            store_reg(s, 14, tmp);
5945            /* Sign-extend the 24-bit offset */
5946            offset = (((int32_t)insn) << 8) >> 8;
5947            /* offset * 4 + bit24 * 2 + (thumb bit) */
5948            val += (offset << 2) | ((insn >> 23) & 2) | 1;
5949            /* pipeline offset */
5950            val += 4;
5951            gen_bx_im(s, val);
5952            return;
5953        } else if ((insn & 0x0e000f00) == 0x0c000100) {
5954            if (arm_feature(env, ARM_FEATURE_IWMMXT)) {
5955                /* iWMMXt register transfer.  */
5956                if (env->cp15.c15_cpar & (1 << 1))
5957                    if (!disas_iwmmxt_insn(env, s, insn))
5958                        return;
5959            }
5960        } else if ((insn & 0x0fe00000) == 0x0c400000) {
5961            /* Coprocessor double register transfer.  */
5962        } else if ((insn & 0x0f000010) == 0x0e000010) {
5963            /* Additional coprocessor register transfer.  */
5964        } else if ((insn & 0x0ff10020) == 0x01000000) {
5965            uint32_t mask;
5966            uint32_t val;
5967            /* cps (privileged) */
5968            if (IS_USER(s))
5969                return;
5970            mask = val = 0;
5971            if (insn & (1 << 19)) {
5972                if (insn & (1 << 8))
5973                    mask |= CPSR_A;
5974                if (insn & (1 << 7))
5975                    mask |= CPSR_I;
5976                if (insn & (1 << 6))
5977                    mask |= CPSR_F;
5978                if (insn & (1 << 18))
5979                    val |= mask;
5980            }
5981            if (insn & (1 << 17)) {
5982                mask |= CPSR_M;
5983                val |= (insn & 0x1f);
5984            }
5985            if (mask) {
5986                gen_op_movl_T0_im(val);
5987                gen_set_psr_T0(s, mask, 0);
5988            }
5989            return;
5990        }
5991        goto illegal_op;
5992    }
5993    if (cond != 0xe) {
5994#ifdef CONFIG_TRACE
5995        if (tracing) {
5996            /* a non-executed conditional instruction takes */
5997            /* only 1 cycle */
5998            gen_traceTicks(1);
5999            ticks -= 1;
6000        }
6001#endif
6002        /* if not always execute, we generate a conditional jump to
6003           next instruction */
6004        s->condlabel = gen_new_label();
6005        gen_test_cc(cond ^ 1, s->condlabel);
6006        s->condjmp = 1;
6007    }
6008#ifdef CONFIG_TRACE
6009    if (tracing && ticks > 0) {
6010        gen_traceTicks(ticks);
6011    }
6012#endif
6013    if ((insn & 0x0f900000) == 0x03000000) {
6014        if ((insn & (1 << 21)) == 0) {
6015            ARCH(6T2);
6016            rd = (insn >> 12) & 0xf;
6017            val = ((insn >> 4) & 0xf000) | (insn & 0xfff);
6018            if ((insn & (1 << 22)) == 0) {
6019                /* MOVW */
6020                tmp = new_tmp();
6021                tcg_gen_movi_i32(tmp, val);
6022            } else {
6023                /* MOVT */
6024                tmp = load_reg(s, rd);
6025                tcg_gen_ext16u_i32(tmp, tmp);
6026                tcg_gen_ori_i32(tmp, tmp, val << 16);
6027            }
6028            store_reg(s, rd, tmp);
6029        } else {
6030            if (((insn >> 12) & 0xf) != 0xf)
6031                goto illegal_op;
6032            if (((insn >> 16) & 0xf) == 0) {
6033                gen_nop_hint(s, insn & 0xff);
6034            } else {
6035                /* CPSR = immediate */
6036                val = insn & 0xff;
6037                shift = ((insn >> 8) & 0xf) * 2;
6038                if (shift)
6039                    val = (val >> shift) | (val << (32 - shift));
6040                gen_op_movl_T0_im(val);
6041                i = ((insn & (1 << 22)) != 0);
6042                if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
6043                    goto illegal_op;
6044            }
6045        }
6046    } else if ((insn & 0x0f900000) == 0x01000000
6047               && (insn & 0x00000090) != 0x00000090) {
6048        /* miscellaneous instructions */
6049        op1 = (insn >> 21) & 3;
6050        sh = (insn >> 4) & 0xf;
6051        rm = insn & 0xf;
6052        switch (sh) {
6053        case 0x0: /* move program status register */
6054            if (op1 & 1) {
6055                /* PSR = reg */
6056                gen_movl_T0_reg(s, rm);
6057                i = ((op1 & 2) != 0);
6058                if (gen_set_psr_T0(s, msr_mask(env, s, (insn >> 16) & 0xf, i), i))
6059                    goto illegal_op;
6060            } else {
6061                /* reg = PSR */
6062                rd = (insn >> 12) & 0xf;
6063                if (op1 & 2) {
6064                    if (IS_USER(s))
6065                        goto illegal_op;
6066                    tmp = load_cpu_field(spsr);
6067                } else {
6068                    tmp = new_tmp();
6069                    gen_helper_cpsr_read(tmp);
6070                }
6071                store_reg(s, rd, tmp);
6072            }
6073            break;
6074        case 0x1:
6075            if (op1 == 1) {
6076                /* branch/exchange thumb (bx).  */
6077                tmp = load_reg(s, rm);
6078                gen_bx(s, tmp);
6079            } else if (op1 == 3) {
6080                /* clz */
6081                rd = (insn >> 12) & 0xf;
6082                tmp = load_reg(s, rm);
6083                gen_helper_clz(tmp, tmp);
6084                store_reg(s, rd, tmp);
6085            } else {
6086                goto illegal_op;
6087            }
6088            break;
6089        case 0x2:
6090            if (op1 == 1) {
6091                ARCH(5J); /* bxj */
6092                /* Trivial implementation equivalent to bx.  */
6093                tmp = load_reg(s, rm);
6094                gen_bx(s, tmp);
6095            } else {
6096                goto illegal_op;
6097            }
6098            break;
6099        case 0x3:
6100            if (op1 != 1)
6101              goto illegal_op;
6102
6103            /* branch link/exchange thumb (blx) */
6104            tmp = load_reg(s, rm);
6105            tmp2 = new_tmp();
6106            tcg_gen_movi_i32(tmp2, s->pc);
6107            store_reg(s, 14, tmp2);
6108            gen_bx(s, tmp);
6109            break;
6110        case 0x5: /* saturating add/subtract */
6111            rd = (insn >> 12) & 0xf;
6112            rn = (insn >> 16) & 0xf;
6113            tmp = load_reg(s, rm);
6114            tmp2 = load_reg(s, rn);
6115            if (op1 & 2)
6116                gen_helper_double_saturate(tmp2, tmp2);
6117            if (op1 & 1)
6118                gen_helper_sub_saturate(tmp, tmp, tmp2);
6119            else
6120                gen_helper_add_saturate(tmp, tmp, tmp2);
6121            dead_tmp(tmp2);
6122            store_reg(s, rd, tmp);
6123            break;
6124        case 7: /* bkpt */
6125            gen_set_condexec(s);
6126            gen_set_pc_im(s->pc - 4);
6127            gen_exception(EXCP_BKPT);
6128            s->is_jmp = DISAS_JUMP;
6129            break;
6130        case 0x8: /* signed multiply */
6131        case 0xa:
6132        case 0xc:
6133        case 0xe:
6134            rs = (insn >> 8) & 0xf;
6135            rn = (insn >> 12) & 0xf;
6136            rd = (insn >> 16) & 0xf;
6137            if (op1 == 1) {
6138                /* (32 * 16) >> 16 */
6139                tmp = load_reg(s, rm);
6140                tmp2 = load_reg(s, rs);
6141                if (sh & 4)
6142                    tcg_gen_sari_i32(tmp2, tmp2, 16);
6143                else
6144                    gen_sxth(tmp2);
6145                tmp64 = gen_muls_i64_i32(tmp, tmp2);
6146                tcg_gen_shri_i64(tmp64, tmp64, 16);
6147                tmp = new_tmp();
6148                tcg_gen_trunc_i64_i32(tmp, tmp64);
6149                if ((sh & 2) == 0) {
6150                    tmp2 = load_reg(s, rn);
6151                    gen_helper_add_setq(tmp, tmp, tmp2);
6152                    dead_tmp(tmp2);
6153                }
6154                store_reg(s, rd, tmp);
6155            } else {
6156                /* 16 * 16 */
6157                tmp = load_reg(s, rm);
6158                tmp2 = load_reg(s, rs);
6159                gen_mulxy(tmp, tmp2, sh & 2, sh & 4);
6160                dead_tmp(tmp2);
6161                if (op1 == 2) {
6162                    tmp64 = tcg_temp_new_i64();
6163                    tcg_gen_ext_i32_i64(tmp64, tmp);
6164                    dead_tmp(tmp);
6165                    gen_addq(s, tmp64, rn, rd);
6166                    gen_storeq_reg(s, rn, rd, tmp64);
6167                } else {
6168                    if (op1 == 0) {
6169                        tmp2 = load_reg(s, rn);
6170                        gen_helper_add_setq(tmp, tmp, tmp2);
6171                        dead_tmp(tmp2);
6172                    }
6173                    store_reg(s, rd, tmp);
6174                }
6175            }
6176            break;
6177        default:
6178            goto illegal_op;
6179        }
6180    } else if (((insn & 0x0e000000) == 0 &&
6181                (insn & 0x00000090) != 0x90) ||
6182               ((insn & 0x0e000000) == (1 << 25))) {
6183        int set_cc, logic_cc, shiftop;
6184
6185        op1 = (insn >> 21) & 0xf;
6186        set_cc = (insn >> 20) & 1;
6187        logic_cc = table_logic_cc[op1] & set_cc;
6188
6189        /* data processing instruction */
6190        if (insn & (1 << 25)) {
6191            /* immediate operand */
6192            val = insn & 0xff;
6193            shift = ((insn >> 8) & 0xf) * 2;
6194            if (shift) {
6195                val = (val >> shift) | (val << (32 - shift));
6196            }
6197            tmp2 = new_tmp();
6198            tcg_gen_movi_i32(tmp2, val);
6199            if (logic_cc && shift) {
6200                gen_set_CF_bit31(tmp2);
6201            }
6202        } else {
6203            /* register */
6204            rm = (insn) & 0xf;
6205            tmp2 = load_reg(s, rm);
6206            shiftop = (insn >> 5) & 3;
6207            if (!(insn & (1 << 4))) {
6208                shift = (insn >> 7) & 0x1f;
6209                gen_arm_shift_im(tmp2, shiftop, shift, logic_cc);
6210            } else {
6211                rs = (insn >> 8) & 0xf;
6212                tmp = load_reg(s, rs);
6213                gen_arm_shift_reg(tmp2, shiftop, tmp, logic_cc);
6214            }
6215        }
6216        if (op1 != 0x0f && op1 != 0x0d) {
6217            rn = (insn >> 16) & 0xf;
6218            tmp = load_reg(s, rn);
6219        } else {
6220            TCGV_UNUSED(tmp);
6221        }
6222        rd = (insn >> 12) & 0xf;
6223        switch(op1) {
6224        case 0x00:
6225            tcg_gen_and_i32(tmp, tmp, tmp2);
6226            if (logic_cc) {
6227                gen_logic_CC(tmp);
6228            }
6229            store_reg_bx(env, s, rd, tmp);
6230            break;
6231        case 0x01:
6232            tcg_gen_xor_i32(tmp, tmp, tmp2);
6233            if (logic_cc) {
6234                gen_logic_CC(tmp);
6235            }
6236            store_reg_bx(env, s, rd, tmp);
6237            break;
6238        case 0x02:
6239            if (set_cc && rd == 15) {
6240                /* SUBS r15, ... is used for exception return.  */
6241                if (IS_USER(s)) {
6242                    goto illegal_op;
6243                }
6244                gen_helper_sub_cc(tmp, tmp, tmp2);
6245                gen_exception_return(s, tmp);
6246            } else {
6247                if (set_cc) {
6248                    gen_helper_sub_cc(tmp, tmp, tmp2);
6249                } else {
6250                    tcg_gen_sub_i32(tmp, tmp, tmp2);
6251                }
6252                store_reg_bx(env, s, rd, tmp);
6253            }
6254            break;
6255        case 0x03:
6256            if (set_cc) {
6257                gen_helper_sub_cc(tmp, tmp2, tmp);
6258            } else {
6259                tcg_gen_sub_i32(tmp, tmp2, tmp);
6260            }
6261            store_reg_bx(env, s, rd, tmp);
6262            break;
6263        case 0x04:
6264            if (set_cc) {
6265                gen_helper_add_cc(tmp, tmp, tmp2);
6266            } else {
6267                tcg_gen_add_i32(tmp, tmp, tmp2);
6268            }
6269            store_reg_bx(env, s, rd, tmp);
6270            break;
6271        case 0x05:
6272            if (set_cc) {
6273                gen_helper_adc_cc(tmp, tmp, tmp2);
6274            } else {
6275                gen_add_carry(tmp, tmp, tmp2);
6276            }
6277            store_reg_bx(env, s, rd, tmp);
6278            break;
6279        case 0x06:
6280            if (set_cc) {
6281                gen_helper_sbc_cc(tmp, tmp, tmp2);
6282            } else {
6283                gen_sub_carry(tmp, tmp, tmp2);
6284            }
6285            store_reg_bx(env, s, rd, tmp);
6286            break;
6287        case 0x07:
6288            if (set_cc) {
6289                gen_helper_sbc_cc(tmp, tmp2, tmp);
6290            } else {
6291                gen_sub_carry(tmp, tmp2, tmp);
6292            }
6293            store_reg_bx(env, s, rd, tmp);
6294            break;
6295        case 0x08:
6296            if (set_cc) {
6297                tcg_gen_and_i32(tmp, tmp, tmp2);
6298                gen_logic_CC(tmp);
6299            }
6300            dead_tmp(tmp);
6301            break;
6302        case 0x09:
6303            if (set_cc) {
6304                tcg_gen_xor_i32(tmp, tmp, tmp2);
6305                gen_logic_CC(tmp);
6306            }
6307            dead_tmp(tmp);
6308            break;
6309        case 0x0a:
6310            if (set_cc) {
6311                gen_helper_sub_cc(tmp, tmp, tmp2);
6312            }
6313            dead_tmp(tmp);
6314            break;
6315        case 0x0b:
6316            if (set_cc) {
6317                gen_helper_add_cc(tmp, tmp, tmp2);
6318            }
6319            dead_tmp(tmp);
6320            break;
6321        case 0x0c:
6322            tcg_gen_or_i32(tmp, tmp, tmp2);
6323            if (logic_cc) {
6324                gen_logic_CC(tmp);
6325            }
6326            store_reg_bx(env, s, rd, tmp);
6327            break;
6328        case 0x0d:
6329            if (logic_cc && rd == 15) {
6330                /* MOVS r15, ... is used for exception return.  */
6331                if (IS_USER(s)) {
6332                    goto illegal_op;
6333                }
6334                gen_exception_return(s, tmp2);
6335            } else {
6336                if (logic_cc) {
6337                    gen_logic_CC(tmp2);
6338                }
6339                store_reg_bx(env, s, rd, tmp2);
6340            }
6341            break;
6342        case 0x0e:
6343            tcg_gen_bic_i32(tmp, tmp, tmp2);
6344            if (logic_cc) {
6345                gen_logic_CC(tmp);
6346            }
6347            store_reg_bx(env, s, rd, tmp);
6348            break;
6349        default:
6350        case 0x0f:
6351            tcg_gen_not_i32(tmp2, tmp2);
6352            if (logic_cc) {
6353                gen_logic_CC(tmp2);
6354            }
6355            store_reg_bx(env, s, rd, tmp2);
6356            break;
6357        }
6358        if (op1 != 0x0f && op1 != 0x0d) {
6359            dead_tmp(tmp2);
6360        }
6361    } else {
6362        /* other instructions */
6363        op1 = (insn >> 24) & 0xf;
6364        switch(op1) {
6365        case 0x0:
6366        case 0x1:
6367            /* multiplies, extra load/stores */
6368            sh = (insn >> 5) & 3;
6369            if (sh == 0) {
6370                if (op1 == 0x0) {
6371                    rd = (insn >> 16) & 0xf;
6372                    rn = (insn >> 12) & 0xf;
6373                    rs = (insn >> 8) & 0xf;
6374                    rm = (insn) & 0xf;
6375                    op1 = (insn >> 20) & 0xf;
6376                    switch (op1) {
6377                    case 0: case 1: case 2: case 3: case 6:
6378                        /* 32 bit mul */
6379                        tmp = load_reg(s, rs);
6380                        tmp2 = load_reg(s, rm);
6381                        tcg_gen_mul_i32(tmp, tmp, tmp2);
6382                        dead_tmp(tmp2);
6383                        if (insn & (1 << 22)) {
6384                            /* Subtract (mls) */
6385                            ARCH(6T2);
6386                            tmp2 = load_reg(s, rn);
6387                            tcg_gen_sub_i32(tmp, tmp2, tmp);
6388                            dead_tmp(tmp2);
6389                        } else if (insn & (1 << 21)) {
6390                            /* Add */
6391                            tmp2 = load_reg(s, rn);
6392                            tcg_gen_add_i32(tmp, tmp, tmp2);
6393                            dead_tmp(tmp2);
6394                        }
6395                        if (insn & (1 << 20))
6396                            gen_logic_CC(tmp);
6397                        store_reg(s, rd, tmp);
6398                        break;
6399                    default:
6400                        /* 64 bit mul */
6401                        tmp = load_reg(s, rs);
6402                        tmp2 = load_reg(s, rm);
6403                        if (insn & (1 << 22))
6404                            tmp64 = gen_muls_i64_i32(tmp, tmp2);
6405                        else
6406                            tmp64 = gen_mulu_i64_i32(tmp, tmp2);
6407                        if (insn & (1 << 21)) /* mult accumulate */
6408                            gen_addq(s, tmp64, rn, rd);
6409                        if (!(insn & (1 << 23))) { /* double accumulate */
6410                            ARCH(6);
6411                            gen_addq_lo(s, tmp64, rn);
6412                            gen_addq_lo(s, tmp64, rd);
6413                        }
6414                        if (insn & (1 << 20))
6415                            gen_logicq_cc(tmp64);
6416                        gen_storeq_reg(s, rn, rd, tmp64);
6417                        break;
6418                    }
6419                } else {
6420                    rn = (insn >> 16) & 0xf;
6421                    rd = (insn >> 12) & 0xf;
6422                    if (insn & (1 << 23)) {
6423                        /* load/store exclusive */
6424                        op1 = (insn >> 21) & 0x3;
6425                        if (op1)
6426                            ARCH(6K);
6427                        else
6428                            ARCH(6);
6429                        gen_movl_T1_reg(s, rn);
6430                        addr = cpu_T[1];
6431                        if (insn & (1 << 20)) {
6432                            gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
6433                            switch (op1) {
6434                            case 0: /* ldrex */
6435                                tmp = gen_ld32(addr, IS_USER(s));
6436                                break;
6437                            case 1: /* ldrexd */
6438                                tmp = gen_ld32(addr, IS_USER(s));
6439                                store_reg(s, rd, tmp);
6440                                tcg_gen_addi_i32(addr, addr, 4);
6441                                tmp = gen_ld32(addr, IS_USER(s));
6442                                rd++;
6443                                break;
6444                            case 2: /* ldrexb */
6445                                tmp = gen_ld8u(addr, IS_USER(s));
6446                                break;
6447                            case 3: /* ldrexh */
6448                                tmp = gen_ld16u(addr, IS_USER(s));
6449                                break;
6450                            default:
6451                                abort();
6452                            }
6453                            store_reg(s, rd, tmp);
6454                        } else {
6455                            int label = gen_new_label();
6456                            rm = insn & 0xf;
6457                            gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
6458                            tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
6459                                                0, label);
6460                            tmp = load_reg(s,rm);
6461                            switch (op1) {
6462                            case 0:  /*  strex */
6463                                gen_st32(tmp, addr, IS_USER(s));
6464                                break;
6465                            case 1: /*  strexd */
6466                                gen_st32(tmp, addr, IS_USER(s));
6467                                tcg_gen_addi_i32(addr, addr, 4);
6468                                tmp = load_reg(s, rm + 1);
6469                                gen_st32(tmp, addr, IS_USER(s));
6470                                break;
6471                            case 2: /*  strexb */
6472                                gen_st8(tmp, addr, IS_USER(s));
6473                                break;
6474                            case 3: /* strexh */
6475                                gen_st16(tmp, addr, IS_USER(s));
6476                                break;
6477                            default:
6478                                abort();
6479                            }
6480                            gen_set_label(label);
6481                            gen_movl_reg_T0(s, rd);
6482                        }
6483                    } else {
6484                        /* SWP instruction */
6485                        rm = (insn) & 0xf;
6486
6487                        /* ??? This is not really atomic.  However we know
6488                           we never have multiple CPUs running in parallel,
6489                           so it is good enough.  */
6490                        addr = load_reg(s, rn);
6491                        tmp = load_reg(s, rm);
6492                        if (insn & (1 << 22)) {
6493                            tmp2 = gen_ld8u(addr, IS_USER(s));
6494                            gen_st8(tmp, addr, IS_USER(s));
6495                        } else {
6496                            tmp2 = gen_ld32(addr, IS_USER(s));
6497                            gen_st32(tmp, addr, IS_USER(s));
6498                        }
6499                        dead_tmp(addr);
6500                        store_reg(s, rd, tmp2);
6501                    }
6502                }
6503            } else {
6504                int address_offset;
6505                int load;
6506                /* Misc load/store */
6507                rn = (insn >> 16) & 0xf;
6508                rd = (insn >> 12) & 0xf;
6509                addr = load_reg(s, rn);
6510                if (insn & (1 << 24))
6511                    gen_add_datah_offset(s, insn, 0, addr);
6512                address_offset = 0;
6513                if (insn & (1 << 20)) {
6514                    /* load */
6515                    switch(sh) {
6516                    case 1:
6517                        tmp = gen_ld16u(addr, IS_USER(s));
6518                        break;
6519                    case 2:
6520                        tmp = gen_ld8s(addr, IS_USER(s));
6521                        break;
6522                    default:
6523                    case 3:
6524                        tmp = gen_ld16s(addr, IS_USER(s));
6525                        break;
6526                    }
6527                    load = 1;
6528                } else if (sh & 2) {
6529                    /* doubleword */
6530                    if (sh & 1) {
6531                        /* store */
6532                        tmp = load_reg(s, rd);
6533                        gen_st32(tmp, addr, IS_USER(s));
6534                        tcg_gen_addi_i32(addr, addr, 4);
6535                        tmp = load_reg(s, rd + 1);
6536                        gen_st32(tmp, addr, IS_USER(s));
6537                        load = 0;
6538                    } else {
6539                        /* load */
6540                        tmp = gen_ld32(addr, IS_USER(s));
6541                        store_reg(s, rd, tmp);
6542                        tcg_gen_addi_i32(addr, addr, 4);
6543                        tmp = gen_ld32(addr, IS_USER(s));
6544                        rd++;
6545                        load = 1;
6546                    }
6547                    address_offset = -4;
6548                } else {
6549                    /* store */
6550                    tmp = load_reg(s, rd);
6551                    gen_st16(tmp, addr, IS_USER(s));
6552                    load = 0;
6553                }
6554                /* Perform base writeback before the loaded value to
6555                   ensure correct behavior with overlapping index registers.
6556                   ldrd with base writeback is is undefined if the
6557                   destination and index registers overlap.  */
6558                if (!(insn & (1 << 24))) {
6559                    gen_add_datah_offset(s, insn, address_offset, addr);
6560                    store_reg(s, rn, addr);
6561                } else if (insn & (1 << 21)) {
6562                    if (address_offset)
6563                        tcg_gen_addi_i32(addr, addr, address_offset);
6564                    store_reg(s, rn, addr);
6565                } else {
6566                    dead_tmp(addr);
6567                }
6568                if (load) {
6569                    /* Complete the load.  */
6570                    store_reg(s, rd, tmp);
6571                }
6572            }
6573            break;
6574        case 0x4:
6575        case 0x5:
6576            goto do_ldst;
6577        case 0x6:
6578        case 0x7:
6579            if (insn & (1 << 4)) {
6580                ARCH(6);
6581                /* Armv6 Media instructions.  */
6582                rm = insn & 0xf;
6583                rn = (insn >> 16) & 0xf;
6584                rd = (insn >> 12) & 0xf;
6585                rs = (insn >> 8) & 0xf;
6586                switch ((insn >> 23) & 3) {
6587                case 0: /* Parallel add/subtract.  */
6588                    op1 = (insn >> 20) & 7;
6589                    tmp = load_reg(s, rn);
6590                    tmp2 = load_reg(s, rm);
6591                    sh = (insn >> 5) & 7;
6592                    if ((op1 & 3) == 0 || sh == 5 || sh == 6)
6593                        goto illegal_op;
6594                    gen_arm_parallel_addsub(op1, sh, tmp, tmp2);
6595                    dead_tmp(tmp2);
6596                    store_reg(s, rd, tmp);
6597                    break;
6598                case 1:
6599                    if ((insn & 0x00700020) == 0) {
6600                        /* Halfword pack.  */
6601                        tmp = load_reg(s, rn);
6602                        tmp2 = load_reg(s, rm);
6603                        shift = (insn >> 7) & 0x1f;
6604                        if (insn & (1 << 6)) {
6605                            /* pkhtb */
6606                            if (shift == 0)
6607                                shift = 31;
6608                            tcg_gen_sari_i32(tmp2, tmp2, shift);
6609                            tcg_gen_andi_i32(tmp, tmp, 0xffff0000);
6610                            tcg_gen_ext16u_i32(tmp2, tmp2);
6611                        } else {
6612                            /* pkhbt */
6613                            if (shift)
6614                                tcg_gen_shli_i32(tmp2, tmp2, shift);
6615                            tcg_gen_ext16u_i32(tmp, tmp);
6616                            tcg_gen_andi_i32(tmp2, tmp2, 0xffff0000);
6617                        }
6618                        tcg_gen_or_i32(tmp, tmp, tmp2);
6619                        dead_tmp(tmp2);
6620                        store_reg(s, rd, tmp);
6621                    } else if ((insn & 0x00200020) == 0x00200000) {
6622                        /* [us]sat */
6623                        tmp = load_reg(s, rm);
6624                        shift = (insn >> 7) & 0x1f;
6625                        if (insn & (1 << 6)) {
6626                            if (shift == 0)
6627                                shift = 31;
6628                            tcg_gen_sari_i32(tmp, tmp, shift);
6629                        } else {
6630                            tcg_gen_shli_i32(tmp, tmp, shift);
6631                        }
6632                        sh = (insn >> 16) & 0x1f;
6633                        if (sh != 0) {
6634                            if (insn & (1 << 22))
6635                                gen_helper_usat(tmp, tmp, tcg_const_i32(sh));
6636                            else
6637                                gen_helper_ssat(tmp, tmp, tcg_const_i32(sh));
6638                        }
6639                        store_reg(s, rd, tmp);
6640                    } else if ((insn & 0x00300fe0) == 0x00200f20) {
6641                        /* [us]sat16 */
6642                        tmp = load_reg(s, rm);
6643                        sh = (insn >> 16) & 0x1f;
6644                        if (sh != 0) {
6645                            if (insn & (1 << 22))
6646                                gen_helper_usat16(tmp, tmp, tcg_const_i32(sh));
6647                            else
6648                                gen_helper_ssat16(tmp, tmp, tcg_const_i32(sh));
6649                        }
6650                        store_reg(s, rd, tmp);
6651                    } else if ((insn & 0x00700fe0) == 0x00000fa0) {
6652                        /* Select bytes.  */
6653                        tmp = load_reg(s, rn);
6654                        tmp2 = load_reg(s, rm);
6655                        tmp3 = new_tmp();
6656                        tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
6657                        gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
6658                        dead_tmp(tmp3);
6659                        dead_tmp(tmp2);
6660                        store_reg(s, rd, tmp);
6661                    } else if ((insn & 0x000003e0) == 0x00000060) {
6662                        tmp = load_reg(s, rm);
6663                        shift = (insn >> 10) & 3;
6664                        /* ??? In many cases it's not neccessary to do a
6665                           rotate, a shift is sufficient.  */
6666                        if (shift != 0)
6667                            tcg_gen_rori_i32(tmp, tmp, shift * 8);
6668                        op1 = (insn >> 20) & 7;
6669                        switch (op1) {
6670                        case 0: gen_sxtb16(tmp);  break;
6671                        case 2: gen_sxtb(tmp);    break;
6672                        case 3: gen_sxth(tmp);    break;
6673                        case 4: gen_uxtb16(tmp);  break;
6674                        case 6: gen_uxtb(tmp);    break;
6675                        case 7: gen_uxth(tmp);    break;
6676                        default: goto illegal_op;
6677                        }
6678                        if (rn != 15) {
6679                            tmp2 = load_reg(s, rn);
6680                            if ((op1 & 3) == 0) {
6681                                gen_add16(tmp, tmp2);
6682                            } else {
6683                                tcg_gen_add_i32(tmp, tmp, tmp2);
6684                                dead_tmp(tmp2);
6685                            }
6686                        }
6687                        store_reg(s, rd, tmp);
6688                    } else if ((insn & 0x003f0f60) == 0x003f0f20) {
6689                        /* rev */
6690                        tmp = load_reg(s, rm);
6691                        if (insn & (1 << 22)) {
6692                            if (insn & (1 << 7)) {
6693                                gen_revsh(tmp);
6694                            } else {
6695                                ARCH(6T2);
6696                                gen_helper_rbit(tmp, tmp);
6697                            }
6698                        } else {
6699                            if (insn & (1 << 7))
6700                                gen_rev16(tmp);
6701                            else
6702                                tcg_gen_bswap32_i32(tmp, tmp);
6703                        }
6704                        store_reg(s, rd, tmp);
6705                    } else {
6706                        goto illegal_op;
6707                    }
6708                    break;
6709                case 2: /* Multiplies (Type 3).  */
6710                    tmp = load_reg(s, rm);
6711                    tmp2 = load_reg(s, rs);
6712                    if (insn & (1 << 20)) {
6713                        /* Signed multiply most significant [accumulate].  */
6714                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
6715                        if (insn & (1 << 5))
6716                            tcg_gen_addi_i64(tmp64, tmp64, 0x80000000u);
6717                        tcg_gen_shri_i64(tmp64, tmp64, 32);
6718                        tmp = new_tmp();
6719                        tcg_gen_trunc_i64_i32(tmp, tmp64);
6720                        if (rd != 15) {
6721                            tmp2 = load_reg(s, rd);
6722                            if (insn & (1 << 6)) {
6723                                tcg_gen_sub_i32(tmp, tmp, tmp2);
6724                            } else {
6725                                tcg_gen_add_i32(tmp, tmp, tmp2);
6726                            }
6727                            dead_tmp(tmp2);
6728                        }
6729                        store_reg(s, rn, tmp);
6730                    } else {
6731                        if (insn & (1 << 5))
6732                            gen_swap_half(tmp2);
6733                        gen_smul_dual(tmp, tmp2);
6734                        /* This addition cannot overflow.  */
6735                        if (insn & (1 << 6)) {
6736                            tcg_gen_sub_i32(tmp, tmp, tmp2);
6737                        } else {
6738                            tcg_gen_add_i32(tmp, tmp, tmp2);
6739                        }
6740                        dead_tmp(tmp2);
6741                        if (insn & (1 << 22)) {
6742                            /* smlald, smlsld */
6743                            tmp64 = tcg_temp_new_i64();
6744                            tcg_gen_ext_i32_i64(tmp64, tmp);
6745                            dead_tmp(tmp);
6746                            gen_addq(s, tmp64, rd, rn);
6747                            gen_storeq_reg(s, rd, rn, tmp64);
6748                        } else {
6749                            /* smuad, smusd, smlad, smlsd */
6750                            if (rd != 15)
6751                              {
6752                                tmp2 = load_reg(s, rd);
6753                                gen_helper_add_setq(tmp, tmp, tmp2);
6754                                dead_tmp(tmp2);
6755                              }
6756                            store_reg(s, rn, tmp);
6757                        }
6758                    }
6759                    break;
6760                case 3:
6761                    op1 = ((insn >> 17) & 0x38) | ((insn >> 5) & 7);
6762                    switch (op1) {
6763                    case 0: /* Unsigned sum of absolute differences.  */
6764                        ARCH(6);
6765                        tmp = load_reg(s, rm);
6766                        tmp2 = load_reg(s, rs);
6767                        gen_helper_usad8(tmp, tmp, tmp2);
6768                        dead_tmp(tmp2);
6769                        if (rd != 15) {
6770                            tmp2 = load_reg(s, rd);
6771                            tcg_gen_add_i32(tmp, tmp, tmp2);
6772                            dead_tmp(tmp2);
6773                        }
6774                        store_reg(s, rn, tmp);
6775                        break;
6776                    case 0x20: case 0x24: case 0x28: case 0x2c:
6777                        /* Bitfield insert/clear.  */
6778                        ARCH(6T2);
6779                        shift = (insn >> 7) & 0x1f;
6780                        i = (insn >> 16) & 0x1f;
6781                        i = i + 1 - shift;
6782                        if (rm == 15) {
6783                            tmp = new_tmp();
6784                            tcg_gen_movi_i32(tmp, 0);
6785                        } else {
6786                            tmp = load_reg(s, rm);
6787                        }
6788                        if (i != 32) {
6789                            tmp2 = load_reg(s, rd);
6790                            gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
6791                            dead_tmp(tmp2);
6792                        }
6793                        store_reg(s, rd, tmp);
6794                        break;
6795                    case 0x12: case 0x16: case 0x1a: case 0x1e: /* sbfx */
6796                    case 0x32: case 0x36: case 0x3a: case 0x3e: /* ubfx */
6797                        ARCH(6T2);
6798                        tmp = load_reg(s, rm);
6799                        shift = (insn >> 7) & 0x1f;
6800                        i = ((insn >> 16) & 0x1f) + 1;
6801                        if (shift + i > 32)
6802                            goto illegal_op;
6803                        if (i < 32) {
6804                            if (op1 & 0x20) {
6805                                gen_ubfx(tmp, shift, (1u << i) - 1);
6806                            } else {
6807                                gen_sbfx(tmp, shift, i);
6808                            }
6809                        }
6810                        store_reg(s, rd, tmp);
6811                        break;
6812                    default:
6813                        goto illegal_op;
6814                    }
6815                    break;
6816                }
6817                break;
6818            }
6819        do_ldst:
6820            /* Check for undefined extension instructions
6821             * per the ARM Bible IE:
6822             * xxxx 0111 1111 xxxx  xxxx xxxx 1111 xxxx
6823             */
6824            sh = (0xf << 20) | (0xf << 4);
6825            if (op1 == 0x7 && ((insn & sh) == sh))
6826            {
6827                goto illegal_op;
6828            }
6829            /* load/store byte/word */
6830            rn = (insn >> 16) & 0xf;
6831            rd = (insn >> 12) & 0xf;
6832            tmp2 = load_reg(s, rn);
6833            i = (IS_USER(s) || (insn & 0x01200000) == 0x00200000);
6834            if (insn & (1 << 24))
6835                gen_add_data_offset(s, insn, tmp2);
6836            if (insn & (1 << 20)) {
6837                /* load */
6838                if (insn & (1 << 22)) {
6839                    tmp = gen_ld8u(tmp2, i);
6840                } else {
6841                    tmp = gen_ld32(tmp2, i);
6842                }
6843            } else {
6844                /* store */
6845                tmp = load_reg(s, rd);
6846                if (insn & (1 << 22))
6847                    gen_st8(tmp, tmp2, i);
6848                else
6849                    gen_st32(tmp, tmp2, i);
6850            }
6851            if (!(insn & (1 << 24))) {
6852                gen_add_data_offset(s, insn, tmp2);
6853                store_reg(s, rn, tmp2);
6854            } else if (insn & (1 << 21)) {
6855                store_reg(s, rn, tmp2);
6856            } else {
6857                dead_tmp(tmp2);
6858            }
6859            if (insn & (1 << 20)) {
6860                /* Complete the load.  */
6861                if (rd == 15)
6862                    gen_bx(s, tmp);
6863                else
6864                    store_reg(s, rd, tmp);
6865            }
6866            break;
6867        case 0x08:
6868        case 0x09:
6869            {
6870                int j, n, user, loaded_base;
6871                TCGv loaded_var;
6872                /* load/store multiple words */
6873                /* XXX: store correct base if write back */
6874                user = 0;
6875                if (insn & (1 << 22)) {
6876                    if (IS_USER(s))
6877                        goto illegal_op; /* only usable in supervisor mode */
6878
6879                    if ((insn & (1 << 15)) == 0)
6880                        user = 1;
6881                }
6882                rn = (insn >> 16) & 0xf;
6883                addr = load_reg(s, rn);
6884
6885                /* compute total size */
6886                loaded_base = 0;
6887                TCGV_UNUSED(loaded_var);
6888                n = 0;
6889                for(i=0;i<16;i++) {
6890                    if (insn & (1 << i))
6891                        n++;
6892                }
6893                /* XXX: test invalid n == 0 case ? */
6894                if (insn & (1 << 23)) {
6895                    if (insn & (1 << 24)) {
6896                        /* pre increment */
6897                        tcg_gen_addi_i32(addr, addr, 4);
6898                    } else {
6899                        /* post increment */
6900                    }
6901                } else {
6902                    if (insn & (1 << 24)) {
6903                        /* pre decrement */
6904                        tcg_gen_addi_i32(addr, addr, -(n * 4));
6905                    } else {
6906                        /* post decrement */
6907                        if (n != 1)
6908                        tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
6909                    }
6910                }
6911                j = 0;
6912                for(i=0;i<16;i++) {
6913                    if (insn & (1 << i)) {
6914                        if (insn & (1 << 20)) {
6915                            /* load */
6916                            tmp = gen_ld32(addr, IS_USER(s));
6917                            if (i == 15) {
6918                                gen_bx(s, tmp);
6919                            } else if (user) {
6920                                gen_helper_set_user_reg(tcg_const_i32(i), tmp);
6921                                dead_tmp(tmp);
6922                            } else if (i == rn) {
6923                                loaded_var = tmp;
6924                                loaded_base = 1;
6925                            } else {
6926                                store_reg(s, i, tmp);
6927                            }
6928                        } else {
6929                            /* store */
6930                            if (i == 15) {
6931                                /* special case: r15 = PC + 8 */
6932                                val = (long)s->pc + 4;
6933                                tmp = new_tmp();
6934                                tcg_gen_movi_i32(tmp, val);
6935                            } else if (user) {
6936                                tmp = new_tmp();
6937                                gen_helper_get_user_reg(tmp, tcg_const_i32(i));
6938                            } else {
6939                                tmp = load_reg(s, i);
6940                            }
6941                            gen_st32(tmp, addr, IS_USER(s));
6942                        }
6943                        j++;
6944                        /* no need to add after the last transfer */
6945                        if (j != n)
6946                            tcg_gen_addi_i32(addr, addr, 4);
6947                    }
6948                }
6949                if (insn & (1 << 21)) {
6950                    /* write back */
6951                    if (insn & (1 << 23)) {
6952                        if (insn & (1 << 24)) {
6953                            /* pre increment */
6954                        } else {
6955                            /* post increment */
6956                            tcg_gen_addi_i32(addr, addr, 4);
6957                        }
6958                    } else {
6959                        if (insn & (1 << 24)) {
6960                            /* pre decrement */
6961                            if (n != 1)
6962                                tcg_gen_addi_i32(addr, addr, -((n - 1) * 4));
6963                        } else {
6964                            /* post decrement */
6965                            tcg_gen_addi_i32(addr, addr, -(n * 4));
6966                        }
6967                    }
6968                    store_reg(s, rn, addr);
6969                } else {
6970                    dead_tmp(addr);
6971                }
6972                if (loaded_base) {
6973                    store_reg(s, rn, loaded_var);
6974                }
6975                if ((insn & (1 << 22)) && !user) {
6976                    /* Restore CPSR from SPSR.  */
6977                    tmp = load_cpu_field(spsr);
6978                    gen_set_cpsr(tmp, 0xffffffff);
6979                    dead_tmp(tmp);
6980                    s->is_jmp = DISAS_UPDATE;
6981                }
6982            }
6983            break;
6984        case 0xa:
6985        case 0xb:
6986            {
6987                int32_t offset;
6988
6989                /* branch (and link) */
6990                val = (int32_t)s->pc;
6991                if (insn & (1 << 24)) {
6992                    tmp = new_tmp();
6993                    tcg_gen_movi_i32(tmp, val);
6994                    store_reg(s, 14, tmp);
6995                }
6996                offset = (((int32_t)insn << 8) >> 8);
6997                val += (offset << 2) + 4;
6998                gen_jmp(s, val);
6999            }
7000            break;
7001        case 0xc:
7002        case 0xd:
7003        case 0xe:
7004            /* Coprocessor.  */
7005            if (disas_coproc_insn(env, s, insn))
7006                goto illegal_op;
7007            break;
7008        case 0xf:
7009            /* swi */
7010            gen_set_pc_im(s->pc);
7011            s->is_jmp = DISAS_SWI;
7012            break;
7013        default:
7014        illegal_op:
7015            gen_set_condexec(s);
7016            gen_set_pc_im(s->pc - 4);
7017            gen_exception(EXCP_UDEF);
7018            s->is_jmp = DISAS_JUMP;
7019            break;
7020        }
7021    }
7022}
7023
7024/* Return true if this is a Thumb-2 logical op.  */
7025static int
7026thumb2_logic_op(int op)
7027{
7028    return (op < 8);
7029}
7030
7031/* Generate code for a Thumb-2 data processing operation.  If CONDS is nonzero
7032   then set condition code flags based on the result of the operation.
7033   If SHIFTER_OUT is nonzero then set the carry flag for logical operations
7034   to the high bit of T1.
7035   Returns zero if the opcode is valid.  */
7036
7037static int
7038gen_thumb2_data_op(DisasContext *s, int op, int conds, uint32_t shifter_out)
7039{
7040    int logic_cc;
7041
7042    logic_cc = 0;
7043    switch (op) {
7044    case 0: /* and */
7045        gen_op_andl_T0_T1();
7046        logic_cc = conds;
7047        break;
7048    case 1: /* bic */
7049        gen_op_bicl_T0_T1();
7050        logic_cc = conds;
7051        break;
7052    case 2: /* orr */
7053        gen_op_orl_T0_T1();
7054        logic_cc = conds;
7055        break;
7056    case 3: /* orn */
7057        gen_op_notl_T1();
7058        gen_op_orl_T0_T1();
7059        logic_cc = conds;
7060        break;
7061    case 4: /* eor */
7062        gen_op_xorl_T0_T1();
7063        logic_cc = conds;
7064        break;
7065    case 8: /* add */
7066        if (conds)
7067            gen_op_addl_T0_T1_cc();
7068        else
7069            gen_op_addl_T0_T1();
7070        break;
7071    case 10: /* adc */
7072        if (conds)
7073            gen_op_adcl_T0_T1_cc();
7074        else
7075            gen_adc_T0_T1();
7076        break;
7077    case 11: /* sbc */
7078        if (conds)
7079            gen_op_sbcl_T0_T1_cc();
7080        else
7081            gen_sbc_T0_T1();
7082        break;
7083    case 13: /* sub */
7084        if (conds)
7085            gen_op_subl_T0_T1_cc();
7086        else
7087            gen_op_subl_T0_T1();
7088        break;
7089    case 14: /* rsb */
7090        if (conds)
7091            gen_op_rsbl_T0_T1_cc();
7092        else
7093            gen_op_rsbl_T0_T1();
7094        break;
7095    default: /* 5, 6, 7, 9, 12, 15. */
7096        return 1;
7097    }
7098    if (logic_cc) {
7099        gen_op_logic_T0_cc();
7100        if (shifter_out)
7101            gen_set_CF_bit31(cpu_T[1]);
7102    }
7103    return 0;
7104}
7105
7106/* Translate a 32-bit thumb instruction.  Returns nonzero if the instruction
7107   is not legal.  */
7108static int disas_thumb2_insn(CPUState *env, DisasContext *s, uint16_t insn_hw1)
7109{
7110    uint32_t insn, imm, shift, offset;
7111    uint32_t rd, rn, rm, rs;
7112    TCGv tmp;
7113    TCGv tmp2;
7114    TCGv tmp3;
7115    TCGv addr;
7116    TCGv_i64 tmp64;
7117    int op;
7118    int shiftop;
7119    int conds;
7120    int logic_cc;
7121
7122    if (!(arm_feature(env, ARM_FEATURE_THUMB2)
7123          || arm_feature (env, ARM_FEATURE_M))) {
7124        /* Thumb-1 cores may need to treat bl and blx as a pair of
7125           16-bit instructions to get correct prefetch abort behavior.  */
7126        insn = insn_hw1;
7127        if ((insn & (1 << 12)) == 0) {
7128            /* Second half of blx.  */
7129            offset = ((insn & 0x7ff) << 1);
7130            tmp = load_reg(s, 14);
7131            tcg_gen_addi_i32(tmp, tmp, offset);
7132            tcg_gen_andi_i32(tmp, tmp, 0xfffffffc);
7133
7134            tmp2 = new_tmp();
7135            tcg_gen_movi_i32(tmp2, s->pc | 1);
7136            store_reg(s, 14, tmp2);
7137            gen_bx(s, tmp);
7138            return 0;
7139        }
7140        if (insn & (1 << 11)) {
7141            /* Second half of bl.  */
7142            offset = ((insn & 0x7ff) << 1) | 1;
7143            tmp = load_reg(s, 14);
7144            tcg_gen_addi_i32(tmp, tmp, offset);
7145
7146            tmp2 = new_tmp();
7147            tcg_gen_movi_i32(tmp2, s->pc | 1);
7148            store_reg(s, 14, tmp2);
7149            gen_bx(s, tmp);
7150            return 0;
7151        }
7152        if ((s->pc & ~TARGET_PAGE_MASK) == 0) {
7153            /* Instruction spans a page boundary.  Implement it as two
7154               16-bit instructions in case the second half causes an
7155               prefetch abort.  */
7156            offset = ((int32_t)insn << 21) >> 9;
7157            gen_op_movl_T0_im(s->pc + 2 + offset);
7158            gen_movl_reg_T0(s, 14);
7159            return 0;
7160        }
7161        /* Fall through to 32-bit decode.  */
7162    }
7163
7164    insn = lduw_code(s->pc);
7165#ifdef CONFIG_TRACE
7166    if (tracing) {
7167        int  ticks = get_insn_ticks_thumb(insn);
7168        trace_add_insn( insn_wrap_thumb(insn), 1 );
7169        gen_traceInsn();
7170        gen_traceTicks(ticks);
7171    }
7172#endif
7173    s->pc += 2;
7174    insn |= (uint32_t)insn_hw1 << 16;
7175
7176    if ((insn & 0xf800e800) != 0xf000e800) {
7177        ARCH(6T2);
7178    }
7179
7180    rn = (insn >> 16) & 0xf;
7181    rs = (insn >> 12) & 0xf;
7182    rd = (insn >> 8) & 0xf;
7183    rm = insn & 0xf;
7184    switch ((insn >> 25) & 0xf) {
7185    case 0: case 1: case 2: case 3:
7186        /* 16-bit instructions.  Should never happen.  */
7187        abort();
7188    case 4:
7189        if (insn & (1 << 22)) {
7190            /* Other load/store, table branch.  */
7191            if (insn & 0x01200000) {
7192                /* Load/store doubleword.  */
7193                if (rn == 15) {
7194                    addr = new_tmp();
7195                    tcg_gen_movi_i32(addr, s->pc & ~3);
7196                } else {
7197                    addr = load_reg(s, rn);
7198                }
7199                offset = (insn & 0xff) * 4;
7200                if ((insn & (1 << 23)) == 0)
7201                    offset = -offset;
7202                if (insn & (1 << 24)) {
7203                    tcg_gen_addi_i32(addr, addr, offset);
7204                    offset = 0;
7205                }
7206                if (insn & (1 << 20)) {
7207                    /* ldrd */
7208                    tmp = gen_ld32(addr, IS_USER(s));
7209                    store_reg(s, rs, tmp);
7210                    tcg_gen_addi_i32(addr, addr, 4);
7211                    tmp = gen_ld32(addr, IS_USER(s));
7212                    store_reg(s, rd, tmp);
7213                } else {
7214                    /* strd */
7215                    tmp = load_reg(s, rs);
7216                    gen_st32(tmp, addr, IS_USER(s));
7217                    tcg_gen_addi_i32(addr, addr, 4);
7218                    tmp = load_reg(s, rd);
7219                    gen_st32(tmp, addr, IS_USER(s));
7220                }
7221                if (insn & (1 << 21)) {
7222                    /* Base writeback.  */
7223                    if (rn == 15)
7224                        goto illegal_op;
7225                    tcg_gen_addi_i32(addr, addr, offset - 4);
7226                    store_reg(s, rn, addr);
7227                } else {
7228                    dead_tmp(addr);
7229                }
7230            } else if ((insn & (1 << 23)) == 0) {
7231                /* Load/store exclusive word.  */
7232                gen_movl_T1_reg(s, rn);
7233                addr = cpu_T[1];
7234                if (insn & (1 << 20)) {
7235                    gen_helper_mark_exclusive(cpu_env, cpu_T[1]);
7236                    tmp = gen_ld32(addr, IS_USER(s));
7237                    store_reg(s, rd, tmp);
7238                } else {
7239                    int label = gen_new_label();
7240                    gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
7241                    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0],
7242                                        0, label);
7243                    tmp = load_reg(s, rs);
7244                    gen_st32(tmp, cpu_T[1], IS_USER(s));
7245                    gen_set_label(label);
7246                    gen_movl_reg_T0(s, rd);
7247                }
7248            } else if ((insn & (1 << 6)) == 0) {
7249                /* Table Branch.  */
7250                if (rn == 15) {
7251                    addr = new_tmp();
7252                    tcg_gen_movi_i32(addr, s->pc);
7253                } else {
7254                    addr = load_reg(s, rn);
7255                }
7256                tmp = load_reg(s, rm);
7257                tcg_gen_add_i32(addr, addr, tmp);
7258                if (insn & (1 << 4)) {
7259                    /* tbh */
7260                    tcg_gen_add_i32(addr, addr, tmp);
7261                    dead_tmp(tmp);
7262                    tmp = gen_ld16u(addr, IS_USER(s));
7263                } else { /* tbb */
7264                    dead_tmp(tmp);
7265                    tmp = gen_ld8u(addr, IS_USER(s));
7266                }
7267                dead_tmp(addr);
7268                tcg_gen_shli_i32(tmp, tmp, 1);
7269                tcg_gen_addi_i32(tmp, tmp, s->pc);
7270                store_reg(s, 15, tmp);
7271            } else {
7272                /* Load/store exclusive byte/halfword/doubleword.  */
7273                /* ??? These are not really atomic.  However we know
7274                   we never have multiple CPUs running in parallel,
7275                   so it is good enough.  */
7276                op = (insn >> 4) & 0x3;
7277                /* Must use a global reg for the address because we have
7278                   a conditional branch in the store instruction.  */
7279                gen_movl_T1_reg(s, rn);
7280                addr = cpu_T[1];
7281                if (insn & (1 << 20)) {
7282                    gen_helper_mark_exclusive(cpu_env, addr);
7283                    switch (op) {
7284                    case 0:
7285                        tmp = gen_ld8u(addr, IS_USER(s));
7286                        break;
7287                    case 1:
7288                        tmp = gen_ld16u(addr, IS_USER(s));
7289                        break;
7290                    case 3:
7291                        tmp = gen_ld32(addr, IS_USER(s));
7292                        tcg_gen_addi_i32(addr, addr, 4);
7293                        tmp2 = gen_ld32(addr, IS_USER(s));
7294                        store_reg(s, rd, tmp2);
7295                        break;
7296                    default:
7297                        goto illegal_op;
7298                    }
7299                    store_reg(s, rs, tmp);
7300                } else {
7301                    int label = gen_new_label();
7302                    /* Must use a global that is not killed by the branch.  */
7303                    gen_helper_test_exclusive(cpu_T[0], cpu_env, addr);
7304                    tcg_gen_brcondi_i32(TCG_COND_NE, cpu_T[0], 0, label);
7305                    tmp = load_reg(s, rs);
7306                    switch (op) {
7307                    case 0:
7308                        gen_st8(tmp, addr, IS_USER(s));
7309                        break;
7310                    case 1:
7311                        gen_st16(tmp, addr, IS_USER(s));
7312                        break;
7313                    case 3:
7314                        gen_st32(tmp, addr, IS_USER(s));
7315                        tcg_gen_addi_i32(addr, addr, 4);
7316                        tmp = load_reg(s, rd);
7317                        gen_st32(tmp, addr, IS_USER(s));
7318                        break;
7319                    default:
7320                        goto illegal_op;
7321                    }
7322                    gen_set_label(label);
7323                    gen_movl_reg_T0(s, rm);
7324                }
7325            }
7326        } else {
7327            /* Load/store multiple, RFE, SRS.  */
7328            if (((insn >> 23) & 1) == ((insn >> 24) & 1)) {
7329                /* Not available in user mode.  */
7330                if (IS_USER(s))
7331                    goto illegal_op;
7332                if (insn & (1 << 20)) {
7333                    /* rfe */
7334                    addr = load_reg(s, rn);
7335                    if ((insn & (1 << 24)) == 0)
7336                        tcg_gen_addi_i32(addr, addr, -8);
7337                    /* Load PC into tmp and CPSR into tmp2.  */
7338                    tmp = gen_ld32(addr, 0);
7339                    tcg_gen_addi_i32(addr, addr, 4);
7340                    tmp2 = gen_ld32(addr, 0);
7341                    if (insn & (1 << 21)) {
7342                        /* Base writeback.  */
7343                        if (insn & (1 << 24)) {
7344                            tcg_gen_addi_i32(addr, addr, 4);
7345                        } else {
7346                            tcg_gen_addi_i32(addr, addr, -4);
7347                        }
7348                        store_reg(s, rn, addr);
7349                    } else {
7350                        dead_tmp(addr);
7351                    }
7352                    gen_rfe(s, tmp, tmp2);
7353                } else {
7354                    /* srs */
7355                    op = (insn & 0x1f);
7356                    if (op == (env->uncached_cpsr & CPSR_M)) {
7357                        addr = load_reg(s, 13);
7358                    } else {
7359                        addr = new_tmp();
7360                        gen_helper_get_r13_banked(addr, cpu_env, tcg_const_i32(op));
7361                    }
7362                    if ((insn & (1 << 24)) == 0) {
7363                        tcg_gen_addi_i32(addr, addr, -8);
7364                    }
7365                    tmp = load_reg(s, 14);
7366                    gen_st32(tmp, addr, 0);
7367                    tcg_gen_addi_i32(addr, addr, 4);
7368                    tmp = new_tmp();
7369                    gen_helper_cpsr_read(tmp);
7370                    gen_st32(tmp, addr, 0);
7371                    if (insn & (1 << 21)) {
7372                        if ((insn & (1 << 24)) == 0) {
7373                            tcg_gen_addi_i32(addr, addr, -4);
7374                        } else {
7375                            tcg_gen_addi_i32(addr, addr, 4);
7376                        }
7377                        if (op == (env->uncached_cpsr & CPSR_M)) {
7378                            store_reg(s, 13, addr);
7379                        } else {
7380                            gen_helper_set_r13_banked(cpu_env,
7381                                tcg_const_i32(op), addr);
7382                        }
7383                    } else {
7384                        dead_tmp(addr);
7385                    }
7386                }
7387            } else {
7388                int i;
7389                /* Load/store multiple.  */
7390                addr = load_reg(s, rn);
7391                offset = 0;
7392                for (i = 0; i < 16; i++) {
7393                    if (insn & (1 << i))
7394                        offset += 4;
7395                }
7396                if (insn & (1 << 24)) {
7397                    tcg_gen_addi_i32(addr, addr, -offset);
7398                }
7399
7400                for (i = 0; i < 16; i++) {
7401                    if ((insn & (1 << i)) == 0)
7402                        continue;
7403                    if (insn & (1 << 20)) {
7404                        /* Load.  */
7405                        tmp = gen_ld32(addr, IS_USER(s));
7406                        if (i == 15) {
7407                            gen_bx(s, tmp);
7408                        } else {
7409                            store_reg(s, i, tmp);
7410                        }
7411                    } else {
7412                        /* Store.  */
7413                        tmp = load_reg(s, i);
7414                        gen_st32(tmp, addr, IS_USER(s));
7415                    }
7416                    tcg_gen_addi_i32(addr, addr, 4);
7417                }
7418                if (insn & (1 << 21)) {
7419                    /* Base register writeback.  */
7420                    if (insn & (1 << 24)) {
7421                        tcg_gen_addi_i32(addr, addr, -offset);
7422                    }
7423                    /* Fault if writeback register is in register list.  */
7424                    if (insn & (1 << rn))
7425                        goto illegal_op;
7426                    store_reg(s, rn, addr);
7427                } else {
7428                    dead_tmp(addr);
7429                }
7430            }
7431        }
7432        break;
7433    case 5: /* Data processing register constant shift.  */
7434        if (rn == 15)
7435            gen_op_movl_T0_im(0);
7436        else
7437            gen_movl_T0_reg(s, rn);
7438        gen_movl_T1_reg(s, rm);
7439        op = (insn >> 21) & 0xf;
7440        shiftop = (insn >> 4) & 3;
7441        shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
7442        conds = (insn & (1 << 20)) != 0;
7443        logic_cc = (conds && thumb2_logic_op(op));
7444        gen_arm_shift_im(cpu_T[1], shiftop, shift, logic_cc);
7445        if (gen_thumb2_data_op(s, op, conds, 0))
7446            goto illegal_op;
7447        if (rd != 15)
7448            gen_movl_reg_T0(s, rd);
7449        break;
7450    case 13: /* Misc data processing.  */
7451        op = ((insn >> 22) & 6) | ((insn >> 7) & 1);
7452        if (op < 4 && (insn & 0xf000) != 0xf000)
7453            goto illegal_op;
7454        switch (op) {
7455        case 0: /* Register controlled shift.  */
7456            tmp = load_reg(s, rn);
7457            tmp2 = load_reg(s, rm);
7458            if ((insn & 0x70) != 0)
7459                goto illegal_op;
7460            op = (insn >> 21) & 3;
7461            logic_cc = (insn & (1 << 20)) != 0;
7462            gen_arm_shift_reg(tmp, op, tmp2, logic_cc);
7463            if (logic_cc)
7464                gen_logic_CC(tmp);
7465            store_reg_bx(env, s, rd, tmp);
7466            break;
7467        case 1: /* Sign/zero extend.  */
7468            tmp = load_reg(s, rm);
7469            shift = (insn >> 4) & 3;
7470            /* ??? In many cases it's not neccessary to do a
7471               rotate, a shift is sufficient.  */
7472            if (shift != 0)
7473                tcg_gen_rori_i32(tmp, tmp, shift * 8);
7474            op = (insn >> 20) & 7;
7475            switch (op) {
7476            case 0: gen_sxth(tmp);   break;
7477            case 1: gen_uxth(tmp);   break;
7478            case 2: gen_sxtb16(tmp); break;
7479            case 3: gen_uxtb16(tmp); break;
7480            case 4: gen_sxtb(tmp);   break;
7481            case 5: gen_uxtb(tmp);   break;
7482            default: goto illegal_op;
7483            }
7484            if (rn != 15) {
7485                tmp2 = load_reg(s, rn);
7486                if ((op >> 1) == 1) {
7487                    gen_add16(tmp, tmp2);
7488                } else {
7489                    tcg_gen_add_i32(tmp, tmp, tmp2);
7490                    dead_tmp(tmp2);
7491                }
7492            }
7493            store_reg(s, rd, tmp);
7494            break;
7495        case 2: /* SIMD add/subtract.  */
7496            op = (insn >> 20) & 7;
7497            shift = (insn >> 4) & 7;
7498            if ((op & 3) == 3 || (shift & 3) == 3)
7499                goto illegal_op;
7500            tmp = load_reg(s, rn);
7501            tmp2 = load_reg(s, rm);
7502            gen_thumb2_parallel_addsub(op, shift, tmp, tmp2);
7503            dead_tmp(tmp2);
7504            store_reg(s, rd, tmp);
7505            break;
7506        case 3: /* Other data processing.  */
7507            op = ((insn >> 17) & 0x38) | ((insn >> 4) & 7);
7508            if (op < 4) {
7509                /* Saturating add/subtract.  */
7510                tmp = load_reg(s, rn);
7511                tmp2 = load_reg(s, rm);
7512                if (op & 2)
7513                    gen_helper_double_saturate(tmp, tmp);
7514                if (op & 1)
7515                    gen_helper_sub_saturate(tmp, tmp2, tmp);
7516                else
7517                    gen_helper_add_saturate(tmp, tmp, tmp2);
7518                dead_tmp(tmp2);
7519            } else {
7520                tmp = load_reg(s, rn);
7521                switch (op) {
7522                case 0x0a: /* rbit */
7523                    gen_helper_rbit(tmp, tmp);
7524                    break;
7525                case 0x08: /* rev */
7526                    tcg_gen_bswap32_i32(tmp, tmp);
7527                    break;
7528                case 0x09: /* rev16 */
7529                    gen_rev16(tmp);
7530                    break;
7531                case 0x0b: /* revsh */
7532                    gen_revsh(tmp);
7533                    break;
7534                case 0x10: /* sel */
7535                    tmp2 = load_reg(s, rm);
7536                    tmp3 = new_tmp();
7537                    tcg_gen_ld_i32(tmp3, cpu_env, offsetof(CPUState, GE));
7538                    gen_helper_sel_flags(tmp, tmp3, tmp, tmp2);
7539                    dead_tmp(tmp3);
7540                    dead_tmp(tmp2);
7541                    break;
7542                case 0x18: /* clz */
7543                    gen_helper_clz(tmp, tmp);
7544                    break;
7545                default:
7546                    goto illegal_op;
7547                }
7548            }
7549            store_reg(s, rd, tmp);
7550            break;
7551        case 4: case 5: /* 32-bit multiply.  Sum of absolute differences.  */
7552            op = (insn >> 4) & 0xf;
7553            tmp = load_reg(s, rn);
7554            tmp2 = load_reg(s, rm);
7555            switch ((insn >> 20) & 7) {
7556            case 0: /* 32 x 32 -> 32 */
7557                tcg_gen_mul_i32(tmp, tmp, tmp2);
7558                dead_tmp(tmp2);
7559                if (rs != 15) {
7560                    tmp2 = load_reg(s, rs);
7561                    if (op)
7562                        tcg_gen_sub_i32(tmp, tmp2, tmp);
7563                    else
7564                        tcg_gen_add_i32(tmp, tmp, tmp2);
7565                    dead_tmp(tmp2);
7566                }
7567                break;
7568            case 1: /* 16 x 16 -> 32 */
7569                gen_mulxy(tmp, tmp2, op & 2, op & 1);
7570                dead_tmp(tmp2);
7571                if (rs != 15) {
7572                    tmp2 = load_reg(s, rs);
7573                    gen_helper_add_setq(tmp, tmp, tmp2);
7574                    dead_tmp(tmp2);
7575                }
7576                break;
7577            case 2: /* Dual multiply add.  */
7578            case 4: /* Dual multiply subtract.  */
7579                if (op)
7580                    gen_swap_half(tmp2);
7581                gen_smul_dual(tmp, tmp2);
7582                /* This addition cannot overflow.  */
7583                if (insn & (1 << 22)) {
7584                    tcg_gen_sub_i32(tmp, tmp, tmp2);
7585                } else {
7586                    tcg_gen_add_i32(tmp, tmp, tmp2);
7587                }
7588                dead_tmp(tmp2);
7589                if (rs != 15)
7590                  {
7591                    tmp2 = load_reg(s, rs);
7592                    gen_helper_add_setq(tmp, tmp, tmp2);
7593                    dead_tmp(tmp2);
7594                  }
7595                break;
7596            case 3: /* 32 * 16 -> 32msb */
7597                if (op)
7598                    tcg_gen_sari_i32(tmp2, tmp2, 16);
7599                else
7600                    gen_sxth(tmp2);
7601                tmp64 = gen_muls_i64_i32(tmp, tmp2);
7602                tcg_gen_shri_i64(tmp64, tmp64, 16);
7603                tmp = new_tmp();
7604                tcg_gen_trunc_i64_i32(tmp, tmp64);
7605                if (rs != 15)
7606                  {
7607                    tmp2 = load_reg(s, rs);
7608                    gen_helper_add_setq(tmp, tmp, tmp2);
7609                    dead_tmp(tmp2);
7610                  }
7611                break;
7612            case 5: case 6: /* 32 * 32 -> 32msb */
7613                gen_imull(tmp, tmp2);
7614                if (insn & (1 << 5)) {
7615                    gen_roundqd(tmp, tmp2);
7616                    dead_tmp(tmp2);
7617                } else {
7618                    dead_tmp(tmp);
7619                    tmp = tmp2;
7620                }
7621                if (rs != 15) {
7622                    tmp2 = load_reg(s, rs);
7623                    if (insn & (1 << 21)) {
7624                        tcg_gen_add_i32(tmp, tmp, tmp2);
7625                    } else {
7626                        tcg_gen_sub_i32(tmp, tmp2, tmp);
7627                    }
7628                    dead_tmp(tmp2);
7629                }
7630                break;
7631            case 7: /* Unsigned sum of absolute differences.  */
7632                gen_helper_usad8(tmp, tmp, tmp2);
7633                dead_tmp(tmp2);
7634                if (rs != 15) {
7635                    tmp2 = load_reg(s, rs);
7636                    tcg_gen_add_i32(tmp, tmp, tmp2);
7637                    dead_tmp(tmp2);
7638                }
7639                break;
7640            }
7641            store_reg(s, rd, tmp);
7642            break;
7643        case 6: case 7: /* 64-bit multiply, Divide.  */
7644            op = ((insn >> 4) & 0xf) | ((insn >> 16) & 0x70);
7645            tmp = load_reg(s, rn);
7646            tmp2 = load_reg(s, rm);
7647            if ((op & 0x50) == 0x10) {
7648                /* sdiv, udiv */
7649                if (!arm_feature(env, ARM_FEATURE_DIV))
7650                    goto illegal_op;
7651                if (op & 0x20)
7652                    gen_helper_udiv(tmp, tmp, tmp2);
7653                else
7654                    gen_helper_sdiv(tmp, tmp, tmp2);
7655                dead_tmp(tmp2);
7656                store_reg(s, rd, tmp);
7657            } else if ((op & 0xe) == 0xc) {
7658                /* Dual multiply accumulate long.  */
7659                if (op & 1)
7660                    gen_swap_half(tmp2);
7661                gen_smul_dual(tmp, tmp2);
7662                if (op & 0x10) {
7663                    tcg_gen_sub_i32(tmp, tmp, tmp2);
7664                } else {
7665                    tcg_gen_add_i32(tmp, tmp, tmp2);
7666                }
7667                dead_tmp(tmp2);
7668                /* BUGFIX */
7669                tmp64 = tcg_temp_new_i64();
7670                tcg_gen_ext_i32_i64(tmp64, tmp);
7671                dead_tmp(tmp);
7672                gen_addq(s, tmp64, rs, rd);
7673                gen_storeq_reg(s, rs, rd, tmp64);
7674            } else {
7675                if (op & 0x20) {
7676                    /* Unsigned 64-bit multiply  */
7677                    tmp64 = gen_mulu_i64_i32(tmp, tmp2);
7678                } else {
7679                    if (op & 8) {
7680                        /* smlalxy */
7681                        gen_mulxy(tmp, tmp2, op & 2, op & 1);
7682                        dead_tmp(tmp2);
7683                        tmp64 = tcg_temp_new_i64();
7684                        tcg_gen_ext_i32_i64(tmp64, tmp);
7685                        dead_tmp(tmp);
7686                    } else {
7687                        /* Signed 64-bit multiply  */
7688                        tmp64 = gen_muls_i64_i32(tmp, tmp2);
7689                    }
7690                }
7691                if (op & 4) {
7692                    /* umaal */
7693                    gen_addq_lo(s, tmp64, rs);
7694                    gen_addq_lo(s, tmp64, rd);
7695                } else if (op & 0x40) {
7696                    /* 64-bit accumulate.  */
7697                    gen_addq(s, tmp64, rs, rd);
7698                }
7699                gen_storeq_reg(s, rs, rd, tmp64);
7700            }
7701            break;
7702        }
7703        break;
7704    case 6: case 7: case 14: case 15:
7705        /* Coprocessor.  */
7706        if (((insn >> 24) & 3) == 3) {
7707            /* Translate into the equivalent ARM encoding.  */
7708            insn = (insn & 0xe2ffffff) | ((insn & (1 << 28)) >> 4);
7709            if (disas_neon_data_insn(env, s, insn))
7710                goto illegal_op;
7711        } else {
7712            if (insn & (1 << 28))
7713                goto illegal_op;
7714            if (disas_coproc_insn (env, s, insn))
7715                goto illegal_op;
7716        }
7717        break;
7718    case 8: case 9: case 10: case 11:
7719        if (insn & (1 << 15)) {
7720            /* Branches, misc control.  */
7721            if (insn & 0x5000) {
7722                /* Unconditional branch.  */
7723                /* signextend(hw1[10:0]) -> offset[:12].  */
7724                offset = ((int32_t)insn << 5) >> 9 & ~(int32_t)0xfff;
7725                /* hw1[10:0] -> offset[11:1].  */
7726                offset |= (insn & 0x7ff) << 1;
7727                /* (~hw2[13, 11] ^ offset[24]) -> offset[23,22]
7728                   offset[24:22] already have the same value because of the
7729                   sign extension above.  */
7730                offset ^= ((~insn) & (1 << 13)) << 10;
7731                offset ^= ((~insn) & (1 << 11)) << 11;
7732
7733                if (insn & (1 << 14)) {
7734                    /* Branch and link.  */
7735                    gen_op_movl_T1_im(s->pc | 1);
7736                    gen_movl_reg_T1(s, 14);
7737                }
7738
7739                offset += s->pc;
7740                if (insn & (1 << 12)) {
7741                    /* b/bl */
7742                    gen_jmp(s, offset);
7743                } else {
7744                    /* blx */
7745                    offset &= ~(uint32_t)2;
7746                    gen_bx_im(s, offset);
7747                }
7748            } else if (((insn >> 23) & 7) == 7) {
7749                /* Misc control */
7750                if (insn & (1 << 13))
7751                    goto illegal_op;
7752
7753                if (insn & (1 << 26)) {
7754                    /* Secure monitor call (v6Z) */
7755                    goto illegal_op; /* not implemented.  */
7756                } else {
7757                    op = (insn >> 20) & 7;
7758                    switch (op) {
7759                    case 0: /* msr cpsr.  */
7760                        if (IS_M(env)) {
7761                            tmp = load_reg(s, rn);
7762                            addr = tcg_const_i32(insn & 0xff);
7763                            gen_helper_v7m_msr(cpu_env, addr, tmp);
7764                            gen_lookup_tb(s);
7765                            break;
7766                        }
7767                        /* fall through */
7768                    case 1: /* msr spsr.  */
7769                        if (IS_M(env))
7770                            goto illegal_op;
7771                        gen_movl_T0_reg(s, rn);
7772                        if (gen_set_psr_T0(s,
7773                              msr_mask(env, s, (insn >> 8) & 0xf, op == 1),
7774                              op == 1))
7775                            goto illegal_op;
7776                        break;
7777                    case 2: /* cps, nop-hint.  */
7778                        if (((insn >> 8) & 7) == 0) {
7779                            gen_nop_hint(s, insn & 0xff);
7780                        }
7781                        /* Implemented as NOP in user mode.  */
7782                        if (IS_USER(s))
7783                            break;
7784                        offset = 0;
7785                        imm = 0;
7786                        if (insn & (1 << 10)) {
7787                            if (insn & (1 << 7))
7788                                offset |= CPSR_A;
7789                            if (insn & (1 << 6))
7790                                offset |= CPSR_I;
7791                            if (insn & (1 << 5))
7792                                offset |= CPSR_F;
7793                            if (insn & (1 << 9))
7794                                imm = CPSR_A | CPSR_I | CPSR_F;
7795                        }
7796                        if (insn & (1 << 8)) {
7797                            offset |= 0x1f;
7798                            imm |= (insn & 0x1f);
7799                        }
7800                        if (offset) {
7801                            gen_op_movl_T0_im(imm);
7802                            gen_set_psr_T0(s, offset, 0);
7803                        }
7804                        break;
7805                    case 3: /* Special control operations.  */
7806                        op = (insn >> 4) & 0xf;
7807                        switch (op) {
7808                        case 2: /* clrex */
7809                            gen_helper_clrex(cpu_env);
7810                            break;
7811                        case 4: /* dsb */
7812                        case 5: /* dmb */
7813                        case 6: /* isb */
7814                            /* These execute as NOPs.  */
7815                            ARCH(7);
7816                            break;
7817                        default:
7818                            goto illegal_op;
7819                        }
7820                        break;
7821                    case 4: /* bxj */
7822                        /* Trivial implementation equivalent to bx.  */
7823                        tmp = load_reg(s, rn);
7824                        gen_bx(s, tmp);
7825                        break;
7826                    case 5: /* Exception return.  */
7827                        /* Unpredictable in user mode.  */
7828                        goto illegal_op;
7829                    case 6: /* mrs cpsr.  */
7830                        tmp = new_tmp();
7831                        if (IS_M(env)) {
7832                            addr = tcg_const_i32(insn & 0xff);
7833                            gen_helper_v7m_mrs(tmp, cpu_env, addr);
7834                        } else {
7835                            gen_helper_cpsr_read(tmp);
7836                        }
7837                        store_reg(s, rd, tmp);
7838                        break;
7839                    case 7: /* mrs spsr.  */
7840                        /* Not accessible in user mode.  */
7841                        if (IS_USER(s) || IS_M(env))
7842                            goto illegal_op;
7843                        tmp = load_cpu_field(spsr);
7844                        store_reg(s, rd, tmp);
7845                        break;
7846                    }
7847                }
7848            } else {
7849                /* Conditional branch.  */
7850                op = (insn >> 22) & 0xf;
7851                /* Generate a conditional jump to next instruction.  */
7852                s->condlabel = gen_new_label();
7853                gen_test_cc(op ^ 1, s->condlabel);
7854                s->condjmp = 1;
7855
7856                /* offset[11:1] = insn[10:0] */
7857                offset = (insn & 0x7ff) << 1;
7858                /* offset[17:12] = insn[21:16].  */
7859                offset |= (insn & 0x003f0000) >> 4;
7860                /* offset[31:20] = insn[26].  */
7861                offset |= ((int32_t)((insn << 5) & 0x80000000)) >> 11;
7862                /* offset[18] = insn[13].  */
7863                offset |= (insn & (1 << 13)) << 5;
7864                /* offset[19] = insn[11].  */
7865                offset |= (insn & (1 << 11)) << 8;
7866
7867                /* jump to the offset */
7868                gen_jmp(s, s->pc + offset);
7869            }
7870        } else {
7871            /* Data processing immediate.  */
7872            if (insn & (1 << 25)) {
7873                if (insn & (1 << 24)) {
7874                    if (insn & (1 << 20))
7875                        goto illegal_op;
7876                    /* Bitfield/Saturate.  */
7877                    op = (insn >> 21) & 7;
7878                    imm = insn & 0x1f;
7879                    shift = ((insn >> 6) & 3) | ((insn >> 10) & 0x1c);
7880                    if (rn == 15) {
7881                        tmp = new_tmp();
7882                        tcg_gen_movi_i32(tmp, 0);
7883                    } else {
7884                        tmp = load_reg(s, rn);
7885                    }
7886                    switch (op) {
7887                    case 2: /* Signed bitfield extract.  */
7888                        imm++;
7889                        if (shift + imm > 32)
7890                            goto illegal_op;
7891                        if (imm < 32)
7892                            gen_sbfx(tmp, shift, imm);
7893                        break;
7894                    case 6: /* Unsigned bitfield extract.  */
7895                        imm++;
7896                        if (shift + imm > 32)
7897                            goto illegal_op;
7898                        if (imm < 32)
7899                            gen_ubfx(tmp, shift, (1u << imm) - 1);
7900                        break;
7901                    case 3: /* Bitfield insert/clear.  */
7902                        if (imm < shift)
7903                            goto illegal_op;
7904                        imm = imm + 1 - shift;
7905                        if (imm != 32) {
7906                            tmp2 = load_reg(s, rd);
7907                            gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
7908                            dead_tmp(tmp2);
7909                        }
7910                        break;
7911                    case 7:
7912                        goto illegal_op;
7913                    default: /* Saturate.  */
7914                        if (shift) {
7915                            if (op & 1)
7916                                tcg_gen_sari_i32(tmp, tmp, shift);
7917                            else
7918                                tcg_gen_shli_i32(tmp, tmp, shift);
7919                        }
7920                        tmp2 = tcg_const_i32(imm);
7921                        if (op & 4) {
7922                            /* Unsigned.  */
7923                            if ((op & 1) && shift == 0)
7924                                gen_helper_usat16(tmp, tmp, tmp2);
7925                            else
7926                                gen_helper_usat(tmp, tmp, tmp2);
7927                        } else {
7928                            /* Signed.  */
7929                            if ((op & 1) && shift == 0)
7930                                gen_helper_ssat16(tmp, tmp, tmp2);
7931                            else
7932                                gen_helper_ssat(tmp, tmp, tmp2);
7933                        }
7934                        break;
7935                    }
7936                    store_reg(s, rd, tmp);
7937                } else {
7938                    imm = ((insn & 0x04000000) >> 15)
7939                          | ((insn & 0x7000) >> 4) | (insn & 0xff);
7940                    if (insn & (1 << 22)) {
7941                        /* 16-bit immediate.  */
7942                        imm |= (insn >> 4) & 0xf000;
7943                        if (insn & (1 << 23)) {
7944                            /* movt */
7945                            tmp = load_reg(s, rd);
7946                            tcg_gen_ext16u_i32(tmp, tmp);
7947                            tcg_gen_ori_i32(tmp, tmp, imm << 16);
7948                        } else {
7949                            /* movw */
7950                            tmp = new_tmp();
7951                            tcg_gen_movi_i32(tmp, imm);
7952                        }
7953                    } else {
7954                        /* Add/sub 12-bit immediate.  */
7955                        if (rn == 15) {
7956                            offset = s->pc & ~(uint32_t)3;
7957                            if (insn & (1 << 23))
7958                                offset -= imm;
7959                            else
7960                                offset += imm;
7961                            tmp = new_tmp();
7962                            tcg_gen_movi_i32(tmp, offset);
7963                        } else {
7964                            tmp = load_reg(s, rn);
7965                            if (insn & (1 << 23))
7966                                tcg_gen_subi_i32(tmp, tmp, imm);
7967                            else
7968                                tcg_gen_addi_i32(tmp, tmp, imm);
7969                        }
7970                    }
7971                    store_reg(s, rd, tmp);
7972                }
7973            } else {
7974                int shifter_out = 0;
7975                /* modified 12-bit immediate.  */
7976                shift = ((insn & 0x04000000) >> 23) | ((insn & 0x7000) >> 12);
7977                imm = (insn & 0xff);
7978                switch (shift) {
7979                case 0: /* XY */
7980                    /* Nothing to do.  */
7981                    break;
7982                case 1: /* 00XY00XY */
7983                    imm |= imm << 16;
7984                    break;
7985                case 2: /* XY00XY00 */
7986                    imm |= imm << 16;
7987                    imm <<= 8;
7988                    break;
7989                case 3: /* XYXYXYXY */
7990                    imm |= imm << 16;
7991                    imm |= imm << 8;
7992                    break;
7993                default: /* Rotated constant.  */
7994                    shift = (shift << 1) | (imm >> 7);
7995                    imm |= 0x80;
7996                    imm = imm << (32 - shift);
7997                    shifter_out = 1;
7998                    break;
7999                }
8000                gen_op_movl_T1_im(imm);
8001                rn = (insn >> 16) & 0xf;
8002                if (rn == 15)
8003                    gen_op_movl_T0_im(0);
8004                else
8005                    gen_movl_T0_reg(s, rn);
8006                op = (insn >> 21) & 0xf;
8007                if (gen_thumb2_data_op(s, op, (insn & (1 << 20)) != 0,
8008                                       shifter_out))
8009                    goto illegal_op;
8010                rd = (insn >> 8) & 0xf;
8011                if (rd != 15) {
8012                    gen_movl_reg_T0(s, rd);
8013                }
8014            }
8015        }
8016        break;
8017    case 12: /* Load/store single data item.  */
8018        {
8019        int postinc = 0;
8020        int writeback = 0;
8021        int user;
8022        if ((insn & 0x01100000) == 0x01000000) {
8023            if (disas_neon_ls_insn(env, s, insn))
8024                goto illegal_op;
8025            break;
8026        }
8027        user = IS_USER(s);
8028        if (rn == 15) {
8029            addr = new_tmp();
8030            /* PC relative.  */
8031            /* s->pc has already been incremented by 4.  */
8032            imm = s->pc & 0xfffffffc;
8033            if (insn & (1 << 23))
8034                imm += insn & 0xfff;
8035            else
8036                imm -= insn & 0xfff;
8037            tcg_gen_movi_i32(addr, imm);
8038        } else {
8039            addr = load_reg(s, rn);
8040            if (insn & (1 << 23)) {
8041                /* Positive offset.  */
8042                imm = insn & 0xfff;
8043                tcg_gen_addi_i32(addr, addr, imm);
8044            } else {
8045                op = (insn >> 8) & 7;
8046                imm = insn & 0xff;
8047                switch (op) {
8048                case 0: case 8: /* Shifted Register.  */
8049                    shift = (insn >> 4) & 0xf;
8050                    if (shift > 3)
8051                        goto illegal_op;
8052                    tmp = load_reg(s, rm);
8053                    if (shift)
8054                        tcg_gen_shli_i32(tmp, tmp, shift);
8055                    tcg_gen_add_i32(addr, addr, tmp);
8056                    dead_tmp(tmp);
8057                    break;
8058                case 4: /* Negative offset.  */
8059                    tcg_gen_addi_i32(addr, addr, -imm);
8060                    break;
8061                case 6: /* User privilege.  */
8062                    tcg_gen_addi_i32(addr, addr, imm);
8063                    user = 1;
8064                    break;
8065                case 1: /* Post-decrement.  */
8066                    imm = -imm;
8067                    /* Fall through.  */
8068                case 3: /* Post-increment.  */
8069                    postinc = 1;
8070                    writeback = 1;
8071                    break;
8072                case 5: /* Pre-decrement.  */
8073                    imm = -imm;
8074                    /* Fall through.  */
8075                case 7: /* Pre-increment.  */
8076                    tcg_gen_addi_i32(addr, addr, imm);
8077                    writeback = 1;
8078                    break;
8079                default:
8080                    goto illegal_op;
8081                }
8082            }
8083        }
8084        op = ((insn >> 21) & 3) | ((insn >> 22) & 4);
8085        if (insn & (1 << 20)) {
8086            /* Load.  */
8087            if (rs == 15 && op != 2) {
8088                if (op & 2)
8089                    goto illegal_op;
8090                /* Memory hint.  Implemented as NOP.  */
8091            } else {
8092                switch (op) {
8093                case 0: tmp = gen_ld8u(addr, user); break;
8094                case 4: tmp = gen_ld8s(addr, user); break;
8095                case 1: tmp = gen_ld16u(addr, user); break;
8096                case 5: tmp = gen_ld16s(addr, user); break;
8097                case 2: tmp = gen_ld32(addr, user); break;
8098                default: goto illegal_op;
8099                }
8100                if (rs == 15) {
8101                    gen_bx(s, tmp);
8102                } else {
8103                    store_reg(s, rs, tmp);
8104                }
8105            }
8106        } else {
8107            /* Store.  */
8108            if (rs == 15)
8109                goto illegal_op;
8110            tmp = load_reg(s, rs);
8111            switch (op) {
8112            case 0: gen_st8(tmp, addr, user); break;
8113            case 1: gen_st16(tmp, addr, user); break;
8114            case 2: gen_st32(tmp, addr, user); break;
8115            default: goto illegal_op;
8116            }
8117        }
8118        if (postinc)
8119            tcg_gen_addi_i32(addr, addr, imm);
8120        if (writeback) {
8121            store_reg(s, rn, addr);
8122        } else {
8123            dead_tmp(addr);
8124        }
8125        }
8126        break;
8127    default:
8128        goto illegal_op;
8129    }
8130    return 0;
8131illegal_op:
8132    return 1;
8133}
8134
8135static void disas_thumb_insn(CPUState *env, DisasContext *s)
8136{
8137    uint32_t val, insn, op, rm, rn, rd, shift, cond;
8138    int32_t offset;
8139    int i;
8140    TCGv tmp;
8141    TCGv tmp2;
8142    TCGv addr;
8143
8144    if (s->condexec_mask) {
8145        cond = s->condexec_cond;
8146        s->condlabel = gen_new_label();
8147        gen_test_cc(cond ^ 1, s->condlabel);
8148        s->condjmp = 1;
8149    }
8150
8151    insn = lduw_code(s->pc);
8152#ifdef CONFIG_TRACE
8153    if (tracing) {
8154        int  ticks = get_insn_ticks_thumb(insn);
8155        trace_add_insn( insn_wrap_thumb(insn), 1 );
8156        gen_traceInsn();
8157        gen_traceTicks(ticks);
8158    }
8159#endif
8160    s->pc += 2;
8161
8162    switch (insn >> 12) {
8163    case 0: case 1:
8164        rd = insn & 7;
8165        op = (insn >> 11) & 3;
8166        if (op == 3) {
8167            /* add/subtract */
8168            rn = (insn >> 3) & 7;
8169            gen_movl_T0_reg(s, rn);
8170            if (insn & (1 << 10)) {
8171                /* immediate */
8172                gen_op_movl_T1_im((insn >> 6) & 7);
8173            } else {
8174                /* reg */
8175                rm = (insn >> 6) & 7;
8176                gen_movl_T1_reg(s, rm);
8177            }
8178            if (insn & (1 << 9)) {
8179                if (s->condexec_mask)
8180                    gen_op_subl_T0_T1();
8181                else
8182                    gen_op_subl_T0_T1_cc();
8183            } else {
8184                if (s->condexec_mask)
8185                    gen_op_addl_T0_T1();
8186                else
8187                    gen_op_addl_T0_T1_cc();
8188            }
8189            gen_movl_reg_T0(s, rd);
8190        } else {
8191            /* shift immediate */
8192            rm = (insn >> 3) & 7;
8193            shift = (insn >> 6) & 0x1f;
8194            tmp = load_reg(s, rm);
8195            gen_arm_shift_im(tmp, op, shift, s->condexec_mask == 0);
8196            if (!s->condexec_mask)
8197                gen_logic_CC(tmp);
8198            store_reg(s, rd, tmp);
8199        }
8200        break;
8201    case 2: case 3:
8202        /* arithmetic large immediate */
8203        op = (insn >> 11) & 3;
8204        rd = (insn >> 8) & 0x7;
8205        if (op == 0) {
8206            gen_op_movl_T0_im(insn & 0xff);
8207        } else {
8208            gen_movl_T0_reg(s, rd);
8209            gen_op_movl_T1_im(insn & 0xff);
8210        }
8211        switch (op) {
8212        case 0: /* mov */
8213            if (!s->condexec_mask)
8214                gen_op_logic_T0_cc();
8215            break;
8216        case 1: /* cmp */
8217            gen_op_subl_T0_T1_cc();
8218            break;
8219        case 2: /* add */
8220            if (s->condexec_mask)
8221                gen_op_addl_T0_T1();
8222            else
8223                gen_op_addl_T0_T1_cc();
8224            break;
8225        case 3: /* sub */
8226            if (s->condexec_mask)
8227                gen_op_subl_T0_T1();
8228            else
8229                gen_op_subl_T0_T1_cc();
8230            break;
8231        }
8232        if (op != 1)
8233            gen_movl_reg_T0(s, rd);
8234        break;
8235    case 4:
8236        if (insn & (1 << 11)) {
8237            rd = (insn >> 8) & 7;
8238            /* load pc-relative.  Bit 1 of PC is ignored.  */
8239            val = s->pc + 2 + ((insn & 0xff) * 4);
8240            val &= ~(uint32_t)2;
8241            addr = new_tmp();
8242            tcg_gen_movi_i32(addr, val);
8243            tmp = gen_ld32(addr, IS_USER(s));
8244            dead_tmp(addr);
8245            store_reg(s, rd, tmp);
8246            break;
8247        }
8248        if (insn & (1 << 10)) {
8249            /* data processing extended or blx */
8250            rd = (insn & 7) | ((insn >> 4) & 8);
8251            rm = (insn >> 3) & 0xf;
8252            op = (insn >> 8) & 3;
8253            switch (op) {
8254            case 0: /* add */
8255                gen_movl_T0_reg(s, rd);
8256                gen_movl_T1_reg(s, rm);
8257                gen_op_addl_T0_T1();
8258                gen_movl_reg_T0(s, rd);
8259                break;
8260            case 1: /* cmp */
8261                gen_movl_T0_reg(s, rd);
8262                gen_movl_T1_reg(s, rm);
8263                gen_op_subl_T0_T1_cc();
8264                break;
8265            case 2: /* mov/cpy */
8266                gen_movl_T0_reg(s, rm);
8267                gen_movl_reg_T0(s, rd);
8268                break;
8269            case 3:/* branch [and link] exchange thumb register */
8270                tmp = load_reg(s, rm);
8271                if (insn & (1 << 7)) {
8272                    val = (uint32_t)s->pc | 1;
8273                    tmp2 = new_tmp();
8274                    tcg_gen_movi_i32(tmp2, val);
8275                    store_reg(s, 14, tmp2);
8276                }
8277                gen_bx(s, tmp);
8278                break;
8279            }
8280            break;
8281        }
8282
8283        /* data processing register */
8284        rd = insn & 7;
8285        rm = (insn >> 3) & 7;
8286        op = (insn >> 6) & 0xf;
8287        if (op == 2 || op == 3 || op == 4 || op == 7) {
8288            /* the shift/rotate ops want the operands backwards */
8289            val = rm;
8290            rm = rd;
8291            rd = val;
8292            val = 1;
8293        } else {
8294            val = 0;
8295        }
8296
8297        if (op == 9) /* neg */
8298            gen_op_movl_T0_im(0);
8299        else if (op != 0xf) /* mvn doesn't read its first operand */
8300            gen_movl_T0_reg(s, rd);
8301
8302        gen_movl_T1_reg(s, rm);
8303        switch (op) {
8304        case 0x0: /* and */
8305            gen_op_andl_T0_T1();
8306            if (!s->condexec_mask)
8307                gen_op_logic_T0_cc();
8308            break;
8309        case 0x1: /* eor */
8310            gen_op_xorl_T0_T1();
8311            if (!s->condexec_mask)
8312                gen_op_logic_T0_cc();
8313            break;
8314        case 0x2: /* lsl */
8315            if (s->condexec_mask) {
8316                gen_helper_shl(cpu_T[1], cpu_T[1], cpu_T[0]);
8317            } else {
8318                gen_helper_shl_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8319                gen_op_logic_T1_cc();
8320            }
8321            break;
8322        case 0x3: /* lsr */
8323            if (s->condexec_mask) {
8324                gen_helper_shr(cpu_T[1], cpu_T[1], cpu_T[0]);
8325            } else {
8326                gen_helper_shr_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8327                gen_op_logic_T1_cc();
8328            }
8329            break;
8330        case 0x4: /* asr */
8331            if (s->condexec_mask) {
8332                gen_helper_sar(cpu_T[1], cpu_T[1], cpu_T[0]);
8333            } else {
8334                gen_helper_sar_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8335                gen_op_logic_T1_cc();
8336            }
8337            break;
8338        case 0x5: /* adc */
8339            if (s->condexec_mask)
8340                gen_adc_T0_T1();
8341            else
8342                gen_op_adcl_T0_T1_cc();
8343            break;
8344        case 0x6: /* sbc */
8345            if (s->condexec_mask)
8346                gen_sbc_T0_T1();
8347            else
8348                gen_op_sbcl_T0_T1_cc();
8349            break;
8350        case 0x7: /* ror */
8351            if (s->condexec_mask) {
8352                gen_helper_ror(cpu_T[1], cpu_T[1], cpu_T[0]);
8353            } else {
8354                gen_helper_ror_cc(cpu_T[1], cpu_T[1], cpu_T[0]);
8355                gen_op_logic_T1_cc();
8356            }
8357            break;
8358        case 0x8: /* tst */
8359            gen_op_andl_T0_T1();
8360            gen_op_logic_T0_cc();
8361            rd = 16;
8362            break;
8363        case 0x9: /* neg */
8364            if (s->condexec_mask)
8365                tcg_gen_neg_i32(cpu_T[0], cpu_T[1]);
8366            else
8367                gen_op_subl_T0_T1_cc();
8368            break;
8369        case 0xa: /* cmp */
8370            gen_op_subl_T0_T1_cc();
8371            rd = 16;
8372            break;
8373        case 0xb: /* cmn */
8374            gen_op_addl_T0_T1_cc();
8375            rd = 16;
8376            break;
8377        case 0xc: /* orr */
8378            gen_op_orl_T0_T1();
8379            if (!s->condexec_mask)
8380                gen_op_logic_T0_cc();
8381            break;
8382        case 0xd: /* mul */
8383            gen_op_mull_T0_T1();
8384            if (!s->condexec_mask)
8385                gen_op_logic_T0_cc();
8386            break;
8387        case 0xe: /* bic */
8388            gen_op_bicl_T0_T1();
8389            if (!s->condexec_mask)
8390                gen_op_logic_T0_cc();
8391            break;
8392        case 0xf: /* mvn */
8393            gen_op_notl_T1();
8394            if (!s->condexec_mask)
8395                gen_op_logic_T1_cc();
8396            val = 1;
8397            rm = rd;
8398            break;
8399        }
8400        if (rd != 16) {
8401            if (val)
8402                gen_movl_reg_T1(s, rm);
8403            else
8404                gen_movl_reg_T0(s, rd);
8405        }
8406        break;
8407
8408    case 5:
8409        /* load/store register offset.  */
8410        rd = insn & 7;
8411        rn = (insn >> 3) & 7;
8412        rm = (insn >> 6) & 7;
8413        op = (insn >> 9) & 7;
8414        addr = load_reg(s, rn);
8415        tmp = load_reg(s, rm);
8416        tcg_gen_add_i32(addr, addr, tmp);
8417        dead_tmp(tmp);
8418
8419        if (op < 3) /* store */
8420            tmp = load_reg(s, rd);
8421
8422        switch (op) {
8423        case 0: /* str */
8424            gen_st32(tmp, addr, IS_USER(s));
8425            break;
8426        case 1: /* strh */
8427            gen_st16(tmp, addr, IS_USER(s));
8428            break;
8429        case 2: /* strb */
8430            gen_st8(tmp, addr, IS_USER(s));
8431            break;
8432        case 3: /* ldrsb */
8433            tmp = gen_ld8s(addr, IS_USER(s));
8434            break;
8435        case 4: /* ldr */
8436            tmp = gen_ld32(addr, IS_USER(s));
8437            break;
8438        case 5: /* ldrh */
8439            tmp = gen_ld16u(addr, IS_USER(s));
8440            break;
8441        case 6: /* ldrb */
8442            tmp = gen_ld8u(addr, IS_USER(s));
8443            break;
8444        case 7: /* ldrsh */
8445            tmp = gen_ld16s(addr, IS_USER(s));
8446            break;
8447        }
8448        if (op >= 3) /* load */
8449            store_reg(s, rd, tmp);
8450        dead_tmp(addr);
8451        break;
8452
8453    case 6:
8454        /* load/store word immediate offset */
8455        rd = insn & 7;
8456        rn = (insn >> 3) & 7;
8457        addr = load_reg(s, rn);
8458        val = (insn >> 4) & 0x7c;
8459        tcg_gen_addi_i32(addr, addr, val);
8460
8461        if (insn & (1 << 11)) {
8462            /* load */
8463            tmp = gen_ld32(addr, IS_USER(s));
8464            store_reg(s, rd, tmp);
8465        } else {
8466            /* store */
8467            tmp = load_reg(s, rd);
8468            gen_st32(tmp, addr, IS_USER(s));
8469        }
8470        dead_tmp(addr);
8471        break;
8472
8473    case 7:
8474        /* load/store byte immediate offset */
8475        rd = insn & 7;
8476        rn = (insn >> 3) & 7;
8477        addr = load_reg(s, rn);
8478        val = (insn >> 6) & 0x1f;
8479        tcg_gen_addi_i32(addr, addr, val);
8480
8481        if (insn & (1 << 11)) {
8482            /* load */
8483            tmp = gen_ld8u(addr, IS_USER(s));
8484            store_reg(s, rd, tmp);
8485        } else {
8486            /* store */
8487            tmp = load_reg(s, rd);
8488            gen_st8(tmp, addr, IS_USER(s));
8489        }
8490        dead_tmp(addr);
8491        break;
8492
8493    case 8:
8494        /* load/store halfword immediate offset */
8495        rd = insn & 7;
8496        rn = (insn >> 3) & 7;
8497        addr = load_reg(s, rn);
8498        val = (insn >> 5) & 0x3e;
8499        tcg_gen_addi_i32(addr, addr, val);
8500
8501        if (insn & (1 << 11)) {
8502            /* load */
8503            tmp = gen_ld16u(addr, IS_USER(s));
8504            store_reg(s, rd, tmp);
8505        } else {
8506            /* store */
8507            tmp = load_reg(s, rd);
8508            gen_st16(tmp, addr, IS_USER(s));
8509        }
8510        dead_tmp(addr);
8511        break;
8512
8513    case 9:
8514        /* load/store from stack */
8515        rd = (insn >> 8) & 7;
8516        addr = load_reg(s, 13);
8517        val = (insn & 0xff) * 4;
8518        tcg_gen_addi_i32(addr, addr, val);
8519
8520        if (insn & (1 << 11)) {
8521            /* load */
8522            tmp = gen_ld32(addr, IS_USER(s));
8523            store_reg(s, rd, tmp);
8524        } else {
8525            /* store */
8526            tmp = load_reg(s, rd);
8527            gen_st32(tmp, addr, IS_USER(s));
8528        }
8529        dead_tmp(addr);
8530        break;
8531
8532    case 10:
8533        /* add to high reg */
8534        rd = (insn >> 8) & 7;
8535        if (insn & (1 << 11)) {
8536            /* SP */
8537            tmp = load_reg(s, 13);
8538        } else {
8539            /* PC. bit 1 is ignored.  */
8540            tmp = new_tmp();
8541            tcg_gen_movi_i32(tmp, (s->pc + 2) & ~(uint32_t)2);
8542        }
8543        val = (insn & 0xff) * 4;
8544        tcg_gen_addi_i32(tmp, tmp, val);
8545        store_reg(s, rd, tmp);
8546        break;
8547
8548    case 11:
8549        /* misc */
8550        op = (insn >> 8) & 0xf;
8551        switch (op) {
8552        case 0:
8553            /* adjust stack pointer */
8554            tmp = load_reg(s, 13);
8555            val = (insn & 0x7f) * 4;
8556            if (insn & (1 << 7))
8557                val = -(int32_t)val;
8558            tcg_gen_addi_i32(tmp, tmp, val);
8559            store_reg(s, 13, tmp);
8560            break;
8561
8562        case 2: /* sign/zero extend.  */
8563            ARCH(6);
8564            rd = insn & 7;
8565            rm = (insn >> 3) & 7;
8566            tmp = load_reg(s, rm);
8567            switch ((insn >> 6) & 3) {
8568            case 0: gen_sxth(tmp); break;
8569            case 1: gen_sxtb(tmp); break;
8570            case 2: gen_uxth(tmp); break;
8571            case 3: gen_uxtb(tmp); break;
8572            }
8573            store_reg(s, rd, tmp);
8574            break;
8575        case 4: case 5: case 0xc: case 0xd:
8576            /* push/pop */
8577            addr = load_reg(s, 13);
8578            if (insn & (1 << 8))
8579                offset = 4;
8580            else
8581                offset = 0;
8582            for (i = 0; i < 8; i++) {
8583                if (insn & (1 << i))
8584                    offset += 4;
8585            }
8586            if ((insn & (1 << 11)) == 0) {
8587                tcg_gen_addi_i32(addr, addr, -offset);
8588            }
8589            for (i = 0; i < 8; i++) {
8590                if (insn & (1 << i)) {
8591                    if (insn & (1 << 11)) {
8592                        /* pop */
8593                        tmp = gen_ld32(addr, IS_USER(s));
8594                        store_reg(s, i, tmp);
8595                    } else {
8596                        /* push */
8597                        tmp = load_reg(s, i);
8598                        gen_st32(tmp, addr, IS_USER(s));
8599                    }
8600                    /* advance to the next address.  */
8601                    tcg_gen_addi_i32(addr, addr, 4);
8602                }
8603            }
8604            TCGV_UNUSED(tmp);
8605            if (insn & (1 << 8)) {
8606                if (insn & (1 << 11)) {
8607                    /* pop pc */
8608                    tmp = gen_ld32(addr, IS_USER(s));
8609                    /* don't set the pc until the rest of the instruction
8610                       has completed */
8611                } else {
8612                    /* push lr */
8613                    tmp = load_reg(s, 14);
8614                    gen_st32(tmp, addr, IS_USER(s));
8615                }
8616                tcg_gen_addi_i32(addr, addr, 4);
8617            }
8618            if ((insn & (1 << 11)) == 0) {
8619                tcg_gen_addi_i32(addr, addr, -offset);
8620            }
8621            /* write back the new stack pointer */
8622            store_reg(s, 13, addr);
8623            /* set the new PC value */
8624            if ((insn & 0x0900) == 0x0900)
8625                gen_bx(s, tmp);
8626            break;
8627
8628        case 1: case 3: case 9: case 11: /* czb */
8629            rm = insn & 7;
8630            tmp = load_reg(s, rm);
8631            s->condlabel = gen_new_label();
8632            s->condjmp = 1;
8633            if (insn & (1 << 11))
8634                tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, s->condlabel);
8635            else
8636                tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, s->condlabel);
8637            dead_tmp(tmp);
8638            offset = ((insn & 0xf8) >> 2) | (insn & 0x200) >> 3;
8639            val = (uint32_t)s->pc + 2;
8640            val += offset;
8641            gen_jmp(s, val);
8642            break;
8643
8644        case 15: /* IT, nop-hint.  */
8645            if ((insn & 0xf) == 0) {
8646                gen_nop_hint(s, (insn >> 4) & 0xf);
8647                break;
8648            }
8649            /* If Then.  */
8650            s->condexec_cond = (insn >> 4) & 0xe;
8651            s->condexec_mask = insn & 0x1f;
8652            /* No actual code generated for this insn, just setup state.  */
8653            break;
8654
8655        case 0xe: /* bkpt */
8656            gen_set_condexec(s);
8657            gen_set_pc_im(s->pc - 2);
8658            gen_exception(EXCP_BKPT);
8659            s->is_jmp = DISAS_JUMP;
8660            break;
8661
8662        case 0xa: /* rev */
8663            ARCH(6);
8664            rn = (insn >> 3) & 0x7;
8665            rd = insn & 0x7;
8666            tmp = load_reg(s, rn);
8667            switch ((insn >> 6) & 3) {
8668            case 0: tcg_gen_bswap32_i32(tmp, tmp); break;
8669            case 1: gen_rev16(tmp); break;
8670            case 3: gen_revsh(tmp); break;
8671            default: goto illegal_op;
8672            }
8673            store_reg(s, rd, tmp);
8674            break;
8675
8676        case 6: /* cps */
8677            ARCH(6);
8678            if (IS_USER(s))
8679                break;
8680            if (IS_M(env)) {
8681                tmp = tcg_const_i32((insn & (1 << 4)) != 0);
8682                /* PRIMASK */
8683                if (insn & 1) {
8684                    addr = tcg_const_i32(16);
8685                    gen_helper_v7m_msr(cpu_env, addr, tmp);
8686                }
8687                /* FAULTMASK */
8688                if (insn & 2) {
8689                    addr = tcg_const_i32(17);
8690                    gen_helper_v7m_msr(cpu_env, addr, tmp);
8691                }
8692                gen_lookup_tb(s);
8693            } else {
8694                if (insn & (1 << 4))
8695                    shift = CPSR_A | CPSR_I | CPSR_F;
8696                else
8697                    shift = 0;
8698
8699                val = ((insn & 7) << 6) & shift;
8700                gen_op_movl_T0_im(val);
8701                gen_set_psr_T0(s, shift, 0);
8702            }
8703            break;
8704
8705        default:
8706            goto undef;
8707        }
8708        break;
8709
8710    case 12:
8711        /* load/store multiple */
8712        rn = (insn >> 8) & 0x7;
8713        addr = load_reg(s, rn);
8714        for (i = 0; i < 8; i++) {
8715            if (insn & (1 << i)) {
8716                if (insn & (1 << 11)) {
8717                    /* load */
8718                    tmp = gen_ld32(addr, IS_USER(s));
8719                    store_reg(s, i, tmp);
8720                } else {
8721                    /* store */
8722                    tmp = load_reg(s, i);
8723                    gen_st32(tmp, addr, IS_USER(s));
8724                }
8725                /* advance to the next address */
8726                tcg_gen_addi_i32(addr, addr, 4);
8727            }
8728        }
8729        /* Base register writeback.  */
8730        if ((insn & (1 << rn)) == 0) {
8731            store_reg(s, rn, addr);
8732        } else {
8733            dead_tmp(addr);
8734        }
8735        break;
8736
8737    case 13:
8738        /* conditional branch or swi */
8739        cond = (insn >> 8) & 0xf;
8740        if (cond == 0xe)
8741            goto undef;
8742
8743        if (cond == 0xf) {
8744            /* swi */
8745            gen_set_condexec(s);
8746            gen_set_pc_im(s->pc);
8747            s->is_jmp = DISAS_SWI;
8748            break;
8749        }
8750        /* generate a conditional jump to next instruction */
8751        s->condlabel = gen_new_label();
8752        gen_test_cc(cond ^ 1, s->condlabel);
8753        s->condjmp = 1;
8754        gen_movl_T1_reg(s, 15);
8755
8756        /* jump to the offset */
8757        val = (uint32_t)s->pc + 2;
8758        offset = ((int32_t)insn << 24) >> 24;
8759        val += offset << 1;
8760        gen_jmp(s, val);
8761        break;
8762
8763    case 14:
8764        if (insn & (1 << 11)) {
8765            if (disas_thumb2_insn(env, s, insn))
8766              goto undef32;
8767            break;
8768        }
8769        /* unconditional branch */
8770        val = (uint32_t)s->pc;
8771        offset = ((int32_t)insn << 21) >> 21;
8772        val += (offset << 1) + 2;
8773        gen_jmp(s, val);
8774        break;
8775
8776    case 15:
8777        if (disas_thumb2_insn(env, s, insn))
8778            goto undef32;
8779        break;
8780    }
8781    return;
8782undef32:
8783    gen_set_condexec(s);
8784    gen_set_pc_im(s->pc - 4);
8785    gen_exception(EXCP_UDEF);
8786    s->is_jmp = DISAS_JUMP;
8787    return;
8788illegal_op:
8789undef:
8790    gen_set_condexec(s);
8791    gen_set_pc_im(s->pc - 2);
8792    gen_exception(EXCP_UDEF);
8793    s->is_jmp = DISAS_JUMP;
8794}
8795
8796/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
8797   basic block 'tb'. If search_pc is TRUE, also generate PC
8798   information for each intermediate instruction. */
8799static inline void gen_intermediate_code_internal(CPUState *env,
8800                                                  TranslationBlock *tb,
8801                                                  int search_pc)
8802{
8803    DisasContext dc1, *dc = &dc1;
8804    CPUBreakpoint *bp;
8805    uint16_t *gen_opc_end;
8806    int j, lj;
8807    target_ulong pc_start;
8808    uint32_t next_page_start;
8809    int num_insns;
8810    int max_insns;
8811
8812    /* generate intermediate code */
8813    num_temps = 0;
8814    memset(temps, 0, sizeof(temps));
8815
8816    pc_start = tb->pc;
8817
8818    dc->tb = tb;
8819
8820    gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
8821
8822    dc->is_jmp = DISAS_NEXT;
8823    dc->pc = pc_start;
8824    dc->singlestep_enabled = env->singlestep_enabled;
8825    dc->condjmp = 0;
8826    dc->thumb = env->thumb;
8827    dc->condexec_mask = (env->condexec_bits & 0xf) << 1;
8828    dc->condexec_mask_prev = dc->condexec_mask;
8829    dc->condexec_cond = env->condexec_bits >> 4;
8830#if !defined(CONFIG_USER_ONLY)
8831    if (IS_M(env)) {
8832        dc->user = ((env->v7m.exception == 0) && (env->v7m.control & 1));
8833    } else {
8834        dc->user = (env->uncached_cpsr & 0x1f) == ARM_CPU_MODE_USR;
8835    }
8836#endif
8837    cpu_F0s = tcg_temp_new_i32();
8838    cpu_F1s = tcg_temp_new_i32();
8839    cpu_F0d = tcg_temp_new_i64();
8840    cpu_F1d = tcg_temp_new_i64();
8841    cpu_V0 = cpu_F0d;
8842    cpu_V1 = cpu_F1d;
8843    /* FIXME: cpu_M0 can probably be the same as cpu_V0.  */
8844    cpu_M0 = tcg_temp_new_i64();
8845    next_page_start = (pc_start & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
8846    lj = -1;
8847    num_insns = 0;
8848    max_insns = tb->cflags & CF_COUNT_MASK;
8849    if (max_insns == 0)
8850        max_insns = CF_COUNT_MASK;
8851
8852    gen_icount_start();
8853#ifdef CONFIG_TRACE
8854    if (tracing) {
8855        gen_traceBB(trace_static.bb_num, (target_phys_addr_t)tb );
8856        trace_bb_start(dc->pc);
8857    }
8858#endif
8859
8860    do {
8861#ifdef CONFIG_USER_ONLY
8862        /* Intercept jump to the magic kernel page.  */
8863        if (dc->pc >= 0xffff0000) {
8864            /* We always get here via a jump, so know we are not in a
8865               conditional execution block.  */
8866            gen_exception(EXCP_KERNEL_TRAP);
8867            dc->is_jmp = DISAS_UPDATE;
8868            break;
8869        }
8870#else
8871        if (dc->pc >= 0xfffffff0 && IS_M(env)) {
8872            /* We always get here via a jump, so know we are not in a
8873               conditional execution block.  */
8874            gen_exception(EXCP_EXCEPTION_EXIT);
8875            dc->is_jmp = DISAS_UPDATE;
8876            break;
8877        }
8878#endif
8879
8880        if (unlikely(!TAILQ_EMPTY(&env->breakpoints))) {
8881            TAILQ_FOREACH(bp, &env->breakpoints, entry) {
8882                if (bp->pc == dc->pc) {
8883                    gen_set_condexec(dc);
8884                    gen_set_pc_im(dc->pc);
8885                    gen_exception(EXCP_DEBUG);
8886                    dc->is_jmp = DISAS_JUMP;
8887                    /* Advance PC so that clearing the breakpoint will
8888                       invalidate this TB.  */
8889                    dc->pc += 2;
8890                    goto done_generating;
8891                    break;
8892                }
8893            }
8894        }
8895        if (search_pc) {
8896            j = gen_opc_ptr - gen_opc_buf;
8897            if (lj < j) {
8898                lj++;
8899                while (lj < j)
8900                    gen_opc_instr_start[lj++] = 0;
8901            }
8902            gen_opc_pc[lj] = dc->pc;
8903            gen_opc_instr_start[lj] = 1;
8904            gen_opc_icount[lj] = num_insns;
8905        }
8906
8907        if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
8908            gen_io_start();
8909
8910        if (env->thumb) {
8911            disas_thumb_insn(env, dc);
8912            dc->condexec_mask_prev = dc->condexec_mask;
8913            if (dc->condexec_mask) {
8914                dc->condexec_cond = (dc->condexec_cond & 0xe)
8915                                   | ((dc->condexec_mask >> 4) & 1);
8916                dc->condexec_mask = (dc->condexec_mask << 1) & 0x1f;
8917                if (dc->condexec_mask == 0) {
8918                    dc->condexec_cond = 0;
8919                }
8920            }
8921        } else {
8922            disas_arm_insn(env, dc);
8923        }
8924        if (num_temps) {
8925            fprintf(stderr, "Internal resource leak before %08x (%d temps)\n", dc->pc, num_temps);
8926            tcg_dump_ops(&tcg_ctx, stderr);
8927            num_temps = 0;
8928        }
8929
8930        if (dc->condjmp && !dc->is_jmp) {
8931            gen_set_label(dc->condlabel);
8932            dc->condjmp = 0;
8933        }
8934        /* Translation stops when a conditional branch is encountered.
8935         * Otherwise the subsequent code could get translated several times.
8936         * Also stop translation when a page boundary is reached.  This
8937         * ensures prefetch aborts occur at the right place.  */
8938        num_insns ++;
8939    } while (!dc->is_jmp && gen_opc_ptr < gen_opc_end &&
8940             !env->singlestep_enabled &&
8941             !singlestep &&
8942             dc->pc < next_page_start &&
8943             num_insns < max_insns);
8944
8945#ifdef CONFIG_TRACE
8946    if (tracing) {
8947        trace_bb_end();
8948    }
8949#endif
8950
8951    if (tb->cflags & CF_LAST_IO) {
8952        if (dc->condjmp) {
8953            /* FIXME:  This can theoretically happen with self-modifying
8954               code.  */
8955            cpu_abort(env, "IO on conditional branch instruction");
8956        }
8957        gen_io_end();
8958    }
8959
8960    /* At this stage dc->condjmp will only be set when the skipped
8961       instruction was a conditional branch or trap, and the PC has
8962       already been written.  */
8963    if (unlikely(env->singlestep_enabled)) {
8964        /* Make sure the pc is updated, and raise a debug exception.  */
8965        if (dc->condjmp) {
8966            gen_set_condexec(dc);
8967            if (dc->is_jmp == DISAS_SWI) {
8968                gen_exception(EXCP_SWI);
8969            } else {
8970                gen_exception(EXCP_DEBUG);
8971            }
8972            gen_set_label(dc->condlabel);
8973        }
8974        if (dc->condjmp || !dc->is_jmp) {
8975            gen_set_pc_im(dc->pc);
8976            dc->condjmp = 0;
8977        }
8978        gen_set_condexec(dc);
8979        if (dc->is_jmp == DISAS_SWI && !dc->condjmp) {
8980            gen_exception(EXCP_SWI);
8981        } else {
8982            /* FIXME: Single stepping a WFI insn will not halt
8983               the CPU.  */
8984            gen_exception(EXCP_DEBUG);
8985        }
8986    } else {
8987        /* While branches must always occur at the end of an IT block,
8988           there are a few other things that can cause us to terminate
8989           the TB in the middel of an IT block:
8990            - Exception generating instructions (bkpt, swi, undefined).
8991            - Page boundaries.
8992            - Hardware watchpoints.
8993           Hardware breakpoints have already been handled and skip this code.
8994         */
8995        gen_set_condexec(dc);
8996        switch(dc->is_jmp) {
8997        case DISAS_NEXT:
8998            gen_goto_tb(dc, 1, dc->pc);
8999            break;
9000        default:
9001        case DISAS_JUMP:
9002        case DISAS_UPDATE:
9003            /* indicate that the hash table must be used to find the next TB */
9004            tcg_gen_exit_tb(0);
9005            break;
9006        case DISAS_TB_JUMP:
9007            /* nothing more to generate */
9008            break;
9009        case DISAS_WFI:
9010            gen_helper_wfi();
9011            break;
9012        case DISAS_SWI:
9013            gen_exception(EXCP_SWI);
9014            break;
9015        }
9016        if (dc->condjmp) {
9017            gen_set_label(dc->condlabel);
9018            gen_set_condexec(dc);
9019            gen_goto_tb(dc, 1, dc->pc);
9020            dc->condjmp = 0;
9021        }
9022    }
9023
9024done_generating:
9025    gen_icount_end(tb, num_insns);
9026    *gen_opc_ptr = INDEX_op_end;
9027
9028#ifdef DEBUG_DISAS
9029    if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
9030        qemu_log("----------------\n");
9031        qemu_log("IN: %s\n", lookup_symbol(pc_start));
9032        log_target_disas(pc_start, dc->pc - pc_start, env->thumb);
9033        qemu_log("\n");
9034    }
9035#endif
9036    if (search_pc) {
9037        j = gen_opc_ptr - gen_opc_buf;
9038        lj++;
9039        while (lj <= j)
9040            gen_opc_instr_start[lj++] = 0;
9041    } else {
9042        tb->size = dc->pc - pc_start;
9043        tb->icount = num_insns;
9044    }
9045}
9046
9047void gen_intermediate_code(CPUState *env, TranslationBlock *tb)
9048{
9049    gen_intermediate_code_internal(env, tb, 0);
9050}
9051
9052void gen_intermediate_code_pc(CPUState *env, TranslationBlock *tb)
9053{
9054    gen_intermediate_code_internal(env, tb, 1);
9055}
9056
9057static const char *cpu_mode_names[16] = {
9058  "usr", "fiq", "irq", "svc", "???", "???", "???", "abt",
9059  "???", "???", "???", "und", "???", "???", "???", "sys"
9060};
9061
9062void cpu_dump_state(CPUState *env, FILE *f,
9063                    int (*cpu_fprintf)(FILE *f, const char *fmt, ...),
9064                    int flags)
9065{
9066    int i;
9067#if 0
9068    union {
9069        uint32_t i;
9070        float s;
9071    } s0, s1;
9072    CPU_DoubleU d;
9073    /* ??? This assumes float64 and double have the same layout.
9074       Oh well, it's only debug dumps.  */
9075    union {
9076        float64 f64;
9077        double d;
9078    } d0;
9079#endif
9080    uint32_t psr;
9081
9082    for(i=0;i<16;i++) {
9083        cpu_fprintf(f, "R%02d=%08x", i, env->regs[i]);
9084        if ((i % 4) == 3)
9085            cpu_fprintf(f, "\n");
9086        else
9087            cpu_fprintf(f, " ");
9088    }
9089    psr = cpsr_read(env);
9090    cpu_fprintf(f, "PSR=%08x %c%c%c%c %c %s%d\n",
9091                psr,
9092                psr & (1 << 31) ? 'N' : '-',
9093                psr & (1 << 30) ? 'Z' : '-',
9094                psr & (1 << 29) ? 'C' : '-',
9095                psr & (1 << 28) ? 'V' : '-',
9096                psr & CPSR_T ? 'T' : 'A',
9097                cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
9098
9099#if 0
9100    for (i = 0; i < 16; i++) {
9101        d.d = env->vfp.regs[i];
9102        s0.i = d.l.lower;
9103        s1.i = d.l.upper;
9104        d0.f64 = d.d;
9105        cpu_fprintf(f, "s%02d=%08x(%8g) s%02d=%08x(%8g) d%02d=%08x%08x(%8g)\n",
9106                    i * 2, (int)s0.i, s0.s,
9107                    i * 2 + 1, (int)s1.i, s1.s,
9108                    i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
9109                    d0.d);
9110    }
9111    cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
9112#endif
9113}
9114
9115void gen_pc_load(CPUState *env, TranslationBlock *tb,
9116                unsigned long searched_pc, int pc_pos, void *puc)
9117{
9118    env->regs[15] = gen_opc_pc[pc_pos];
9119}
9120